You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2010/10/20 15:05:16 UTC

svn commit: r1025544 - in /lucene/dev/branches/branch_3x/lucene: ./ contrib/misc/src/java/org/apache/lucene/index/ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/

Author: shaie
Date: Wed Oct 20 13:05:15 2010
New Revision: 1025544

URL: http://svn.apache.org/viewvc?rev=1025544&view=rev
Log:
LUCENE-2701: Factor maxMergeSize into findMergesForOptimize in LogMergePolicy

Added:
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java   (with props)
Modified:
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/MergePolicy.java

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1025544&r1=1025543&r2=1025544&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Wed Oct 20 13:05:15 2010
@@ -100,6 +100,10 @@ Changes in runtime behavior
   test lock just before the real lock is acquired.  (Surinder Pal
   Singh Bindra via Mike McCandless)
 
+* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
+  affect optimize() as well (as opposed to only regular merges). This means that
+  you can run optimize() and too large segments won't be merged. (Shai Erera)
+
 API Changes
 
 * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory.  (George

Modified: lucene/dev/branches/branch_3x/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java?rev=1025544&r1=1025543&r2=1025544&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java Wed Oct 20 13:05:15 2010
@@ -103,31 +103,6 @@ public class BalancedSegmentMergePolicy 
     }
   }
   
-  private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
-    final int numSegments = infos.size();
-    int numToOptimize = 0;
-    SegmentInfo optimizeInfo = null;
-    for(int i=0;i<numSegments && numToOptimize <= maxNumSegments;i++) {
-      final SegmentInfo info = infos.info(i);
-      if (segmentsToOptimize.contains(info)) {
-        numToOptimize++;
-        optimizeInfo = info;
-      }
-    }
-
-    return numToOptimize <= maxNumSegments &&
-      (numToOptimize != 1 || isOptimized(writer, optimizeInfo));
-  }
-  
-  private boolean isOptimized(IndexWriter writer, SegmentInfo info)
-    throws IOException {
-    assert writer != null;
-    return !info.hasDeletions() &&
-      !info.hasSeparateNorms() &&
-      info.dir == writer.getDirectory() &&
-      info.getUseCompoundFile() == getUseCompoundFile();
-  }
-
   @Override
   public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
     
@@ -135,7 +110,7 @@ public class BalancedSegmentMergePolicy 
 
     MergeSpecification spec = null;
 
-    if (!isOptimized(infos, writer.get(), maxNumSegments, segmentsToOptimize)) {
+    if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {
 
       // Find the newest (rightmost) segment that needs to
       // be optimized (other segments may have been flushed
@@ -158,7 +133,7 @@ public class BalancedSegmentMergePolicy 
           // Since we must optimize down to 1 segment, the
           // choice is simple:
           boolean useCompoundFile = getUseCompoundFile();
-          if (last > 1 || !isOptimized(writer.get(), infos.info(0))) {
+          if (last > 1 || !isOptimized(infos.info(0))) {
 
             spec = new MergeSpecification();
             spec.add(new OneMerge(infos.range(0, last), useCompoundFile));

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1025544&r1=1025543&r2=1025544&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Oct 20 13:05:15 2010
@@ -54,16 +54,16 @@ public abstract class LogMergePolicy ext
    *  or larger will never be merged.  @see setMaxMergeDocs */
   public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
 
-  private int mergeFactor = DEFAULT_MERGE_FACTOR;
+  protected int mergeFactor = DEFAULT_MERGE_FACTOR;
 
-  long minMergeSize;
-  long maxMergeSize;
-  int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+  protected long minMergeSize;
+  protected long maxMergeSize;
+  protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
   protected boolean calibrateSizeByDeletes = true;
   
-  private boolean useCompoundFile = true;
-  private boolean useCompoundDocStore = true;
+  protected boolean useCompoundFile = true;
+  protected boolean useCompoundDocStore = true;
 
   public LogMergePolicy() {
     super();
@@ -74,7 +74,7 @@ public abstract class LogMergePolicy ext
     return w != null && w.verbose();
   }
   
-  private void message(String message) {
+  protected void message(String message) {
     if (verbose())
       writer.get().message("LMP: " + message);
   }
@@ -180,7 +180,7 @@ public abstract class LogMergePolicy ext
     }
   }
   
-  private boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
+  protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
     final int numSegments = infos.size();
     int numToOptimize = 0;
     SegmentInfo optimizeInfo = null;
@@ -199,7 +199,7 @@ public abstract class LogMergePolicy ext
   /** Returns true if this single info is optimized (has no
    *  pending norms or deletes, is in the same dir as the
    *  writer, and matches the current compound file setting */
-  private boolean isOptimized(SegmentInfo info)
+  protected boolean isOptimized(SegmentInfo info)
     throws IOException {
     IndexWriter w = writer.get();
     assert w != null;
@@ -210,6 +210,103 @@ public abstract class LogMergePolicy ext
       info.getUseCompoundFile() == useCompoundFile;
   }
 
+  /**
+   * Returns the merges necessary to optimize the index, taking the max merge
+   * size or max merge docs into consideration. This method attempts to respect
+   * the {@code maxNumSegments} parameter, however it might be, due to size
+   * constraints, that more than that number of segments will remain in the
+   * index. Also, this method does not guarantee that exactly {@code
+   * maxNumSegments} will remain, but &lt;= that number.
+   */
+  private MergeSpecification findMergesForOptimizeSizeLimit(
+      SegmentInfos infos, int maxNumSegments, int last) throws IOException {
+    MergeSpecification spec = new MergeSpecification();
+
+    int start = last - 1;
+    while (start >= 0) {
+      SegmentInfo info = infos.info(start);
+      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+        // need to skip that segment + add a merge for the 'right' segments,
+        // unless there is only 1 which is optimized.
+        if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
+          // there is more than 1 segment to the right of this one, or an unoptimized single segment.
+          spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile));
+        }
+        last = start;
+      } else if (last - start == mergeFactor) {
+        // mergeFactor eligible segments were found, add them as a merge.
+        spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
+        last = start;
+      }
+      --start;
+    }
+
+    // Add any left-over segments, unless there is just 1 already optimized.
+    if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
+      spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
+    }
+
+    return spec.merges.size() == 0 ? null : spec;
+  }
+  
+  /**
+   * Returns the merges necessary to optimize the index. This method constraints
+   * the returned merges only by the {@code maxNumSegments} parameter, and
+   * guaranteed that exactly that number of segments will remain in the index.
+   */
+  private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
+    MergeSpecification spec = new MergeSpecification();
+    
+    // First, enroll all "full" merges (size
+    // mergeFactor) to potentially be run concurrently:
+    while (last - maxNumSegments + 1 >= mergeFactor) {
+      spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
+      last -= mergeFactor;
+    }
+
+    // Only if there are no full merges pending do we
+    // add a final partial (< mergeFactor segments) merge:
+    if (0 == spec.merges.size()) {
+      if (maxNumSegments == 1) {
+
+        // Since we must optimize down to 1 segment, the
+        // choice is simple:
+        if (last > 1 || !isOptimized(infos.info(0))) {
+          spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
+        }
+      } else if (last > maxNumSegments) {
+
+        // Take care to pick a partial merge that is
+        // least cost, but does not make the index too
+        // lopsided.  If we always just picked the
+        // partial tail then we could produce a highly
+        // lopsided index over time:
+
+        // We must merge this many segments to leave
+        // maxNumSegments in the index (from when
+        // optimize was first kicked off):
+        final int finalMergeSize = last - maxNumSegments + 1;
+
+        // Consider all possible starting points:
+        long bestSize = 0;
+        int bestStart = 0;
+
+        for(int i=0;i<last-finalMergeSize+1;i++) {
+          long sumSize = 0;
+          for(int j=0;j<finalMergeSize;j++)
+            sumSize += size(infos.info(j+i));
+          if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
+            bestStart = i;
+            bestSize = sumSize;
+          }
+        }
+
+        spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
+      }
+    }
+    return spec.merges.size() == 0 ? null : spec;
+  }
+  
   /** Returns the merges necessary to optimize the index.
    *  This merge policy defines "optimized" to mean only one
    *  segment in the index, where that segment has no
@@ -221,81 +318,45 @@ public abstract class LogMergePolicy ext
   @Override
   public MergeSpecification findMergesForOptimize(SegmentInfos infos,
       int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
-    MergeSpecification spec;
 
     assert maxNumSegments > 0;
 
-    if (!isOptimized(infos, maxNumSegments, segmentsToOptimize)) {
-
-      // Find the newest (rightmost) segment that needs to
-      // be optimized (other segments may have been flushed
-      // since optimize started):
-      int last = infos.size();
-      while(last > 0) {
-        final SegmentInfo info = infos.info(--last);
-        if (segmentsToOptimize.contains(info)) {
-          last++;
-          break;
-        }
+    // If the segments are already optimized (e.g. there's only 1 segment), or
+    // there are <maxNumSegements, all optimized, nothing to do.
+    if (isOptimized(infos, maxNumSegments, segmentsToOptimize)) return null;
+    
+    // Find the newest (rightmost) segment that needs to
+    // be optimized (other segments may have been flushed
+    // since optimize started):
+    int last = infos.size();
+    while (last > 0) {
+      final SegmentInfo info = infos.info(--last);
+      if (segmentsToOptimize.contains(info)) {
+        last++;
+        break;
       }
+    }
 
-      if (last > 0) {
-
-        spec = new MergeSpecification();
-
-        // First, enroll all "full" merges (size
-        // mergeFactor) to potentially be run concurrently:
-        while (last - maxNumSegments + 1 >= mergeFactor) {
-          spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
-          last -= mergeFactor;
-        }
-
-        // Only if there are no full merges pending do we
-        // add a final partial (< mergeFactor segments) merge:
-        if (0 == spec.merges.size()) {
-          if (maxNumSegments == 1) {
-
-            // Since we must optimize down to 1 segment, the
-            // choice is simple:
-            if (last > 1 || !isOptimized(infos.info(0)))
-              spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
-          } else if (last > maxNumSegments) {
-
-            // Take care to pick a partial merge that is
-            // least cost, but does not make the index too
-            // lopsided.  If we always just picked the
-            // partial tail then we could produce a highly
-            // lopsided index over time:
-
-            // We must merge this many segments to leave
-            // maxNumSegments in the index (from when
-            // optimize was first kicked off):
-            final int finalMergeSize = last - maxNumSegments + 1;
-
-            // Consider all possible starting points:
-            long bestSize = 0;
-            int bestStart = 0;
-
-            for(int i=0;i<last-finalMergeSize+1;i++) {
-              long sumSize = 0;
-              for(int j=0;j<finalMergeSize;j++)
-                sumSize += size(infos.info(j+i));
-              if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
-                bestStart = i;
-                bestSize = sumSize;
-              }
-            }
-
-            spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
-          }
-        }
-        
-      } else
-        spec = null;
-    } else
-      spec = null;
-
-    return spec;
+    if (last == 0) return null;
+    
+    // There is only one segment already, and it is optimized
+    if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) return null;
+
+    // Check if there are any segments above the threshold
+    boolean anyTooLarge = false;
+    for (int i = 0; i < last; i++) {
+      SegmentInfo info = infos.info(i);
+      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+        anyTooLarge = true;
+        break;
+      }
+    }
+    
+    if (anyTooLarge) {
+      return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last);
+    } else {
+      return findMergesForOptimizeMaxNumSegments(infos, maxNumSegments, last);
+    }
   }
 
   /**

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1025544&r1=1025543&r2=1025544&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/MergePolicy.java Wed Oct 20 13:05:15 2010
@@ -77,8 +77,8 @@ public abstract class MergePolicy implem
     SegmentReader[] readers;        // used by IndexWriter
     SegmentReader[] readersClone;   // used by IndexWriter
     List<String> mergeFiles;            // used by IndexWriter
-    final SegmentInfos segments;
-    final boolean useCompoundFile;
+    public final SegmentInfos segments;
+    public final boolean useCompoundFile;
     boolean aborted;
     Throwable error;
     boolean paused;
@@ -146,7 +146,7 @@ public abstract class MergePolicy implem
       return paused;
     }
 
-    String segString(Directory dir) {
+    public String segString(Directory dir) {
       StringBuilder b = new StringBuilder();
       final int numSegments = segments.size();
       for(int i=0;i<numSegments;i++) {
@@ -162,6 +162,30 @@ public abstract class MergePolicy implem
       }
       return b.toString();
     }
+    
+    /**
+     * Returns the total size in bytes of this merge. Note that this does not
+     * indicate the size of the merged segment, but the input total size.
+     * */
+    public long totalBytesSize() throws IOException {
+      long total = 0;
+      for (SegmentInfo info : segments) {
+        total += info.sizeInBytes();
+      }
+      return total;
+    }
+
+    /**
+     * Returns the total number of documents that are included with this merge.
+     * Note that this does not indicate the number of documents after the merge.
+     * */
+    public int totalNumDocs() throws IOException {
+      int total = 0;
+      for (SegmentInfo info : segments) {
+        total += info.docCount;
+      }
+      return total;
+    }
   }
 
   /**
@@ -176,7 +200,7 @@ public abstract class MergePolicy implem
      * The subset of segments to be included in the primitive merge.
      */
 
-    public List<OneMerge> merges = new ArrayList<OneMerge>();
+    public final List<OneMerge> merges = new ArrayList<OneMerge>();
 
     public void add(OneMerge merge) {
       merges.add(merge);

Added: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java?rev=1025544&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (added)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java Wed Oct 20 13:05:15 2010
@@ -0,0 +1,369 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestSizeBoundedOptimize extends LuceneTestCase {
+
+  private void addDocs(IndexWriter writer, int numDocs) throws IOException {
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      writer.addDocument(doc);
+    }
+    writer.commit();
+  }
+  
+  public void testByteSizeLimit() throws Exception {
+    // tests that the max merge size constraint is applied during optimize.
+    Directory dir = new RAMDirectory();
+
+    // Prepare an index w/ several small segments and a large one.
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    // prevent any merges from happening.
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    final int numSegments = 15;
+    for (int i = 0; i < numSegments; i++) {
+      int numDocs = i == 7 ? 10 : 1;
+      addDocs(writer, numDocs);
+    }
+    
+    writer.close();
+
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
+    lmp.setMaxMergeMB(200.0 / (1 << 20)); // ~100 bytes tops
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+
+    // Should only be 3 segments in the index, because one of them exceeds the size limit
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(3, sis.size());
+  }
+
+  public void testNumDocsLimit() throws Exception {
+    // tests that the max merge docs constraint is applied during optimize.
+    Directory dir = new RAMDirectory();
+
+    // Prepare an index w/ several small segments and a large one.
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    // prevent any merges from happening.
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 5);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    
+    writer.close();
+
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+
+    // Should only be 3 segments in the index, because one of them exceeds the size limit
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(3, sis.size());
+  }
+
+  public void testLastSegmentTooLarge() throws Exception {
+    Directory dir = new RAMDirectory();
+
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 5);
+    
+    writer.close();
+
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(2, sis.size());
+  }
+  
+  public void testFirstSegmentTooLarge() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 5);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    
+    writer.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(2, sis.size());
+  }
+  
+  public void testAllSegmentsSmall() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    
+    writer.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(1, sis.size());
+  }
+  
+  public void testAllSegmentsLarge() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    
+    writer.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(2);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(3, sis.size());
+  }
+  
+  public void testOneLargeOneSmall() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    addDocs(writer, 5);
+    addDocs(writer, 3);
+    addDocs(writer, 5);
+    
+    writer.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(4, sis.size());
+  }
+  
+  public void testMergeFactor() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    addDocs(writer, 5);
+    addDocs(writer, 3);
+    addDocs(writer, 3);
+    
+    writer.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    lmp.setMergeFactor(2);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    // Should only be 4 segments in the index, because of the merge factor and
+    // max merge docs settings.
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(4, sis.size());
+  }
+  
+  public void testSingleNonOptimizedSegment() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    addDocs(writer, 5);
+    addDocs(writer, 3);
+    
+    writer.close();
+  
+    // delete the last document, so that the last segment is optimized.
+    IndexReader r = IndexReader.open(dir, false);
+    r.deleteDocument(r.numDocs() - 1);
+    r.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    // Verify that the last segment does not have deletions.
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(3, sis.size());
+    assertFalse(sis.info(2).hasDeletions());
+  }
+  
+  public void testSingleOptimizedSegment() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    
+    writer.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(3);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    // Verify that the last segment does not have deletions.
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(1, sis.size());
+  }
+
+  public void testSingleNonOptimizedTooLargeSegment() throws Exception {
+    Directory dir = new RAMDirectory();
+    
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    addDocs(writer, 3);
+    
+    writer.close();
+  
+    // delete the last document
+    IndexReader r = IndexReader.open(dir, false);
+    r.deleteDocument(r.numDocs() - 1);
+    r.close();
+    
+    conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
+    LogMergePolicy lmp = new LogDocMergePolicy();
+    lmp.setMaxMergeDocs(2);
+    conf.setMergePolicy(lmp);
+    
+    writer = new IndexWriter(dir, conf);
+    writer.optimize();
+    writer.close();
+    
+    // Verify that the last segment does not have deletions.
+    SegmentInfos sis = new SegmentInfos();
+    sis.read(dir);
+    assertEquals(1, sis.size());
+    assertTrue(sis.info(2).hasDeletions());
+  }
+
+}

Propchange: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL