You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2011/01/17 05:47:03 UTC

svn commit: r1059751 - in /lucene/dev/trunk/lucene: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/

Author: shaie
Date: Mon Jan 17 04:47:03 2011
New Revision: 1059751

URL: http://svn.apache.org/viewvc?rev=1059751&view=rev
Log:
LUCENE-2701: port to trunk

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1059751&r1=1059750&r2=1059751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Jan 17 04:47:03 2011
@@ -507,9 +507,10 @@ Changes in runtime behavior
   usage, allowing applications to accidentally open two writers on the
   same directory.  (Mike McCandless)
 
-* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
-  affect optimize() as well (as opposed to only regular merges). This means that
-  you can run optimize() and too large segments won't be merged. (Shai Erera)
+* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on 
+  LogMergePolicy now affect optimize() as well (as opposed to only regular 
+  merges). This means that you can run optimize() and too large segments won't 
+  be merged. (Shai Erera)
 
 API Changes
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=1059751&r1=1059750&r2=1059751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Mon Jan 17 04:47:03 2011
@@ -30,9 +30,14 @@ public class LogByteSizeMergePolicy exte
    *  or larger will never be merged.  @see setMaxMergeMB */
   public static final double DEFAULT_MAX_MERGE_MB = 2048;
 
+  /** Default maximum segment size.  A segment of this size
+   *  or larger will never be merged during optimize.  @see setMaxMergeMBForOptimize */
+  public static final double DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE = Long.MAX_VALUE;
+
   public LogByteSizeMergePolicy() {
     minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);
     maxMergeSize = (long) (DEFAULT_MAX_MERGE_MB*1024*1024);
+    maxMergeSizeForOptimize = (long) (DEFAULT_MAX_MERGE_MB_FOR_OPTIMIZE*1024*1024);
   }
   
   @Override
@@ -63,6 +68,23 @@ public class LogByteSizeMergePolicy exte
     return ((double) maxMergeSize)/1024/1024;
   }
 
+  /** <p>Determines the largest segment (measured by total
+   *  byte size of the segment's files, in MB) that may be
+   *  merged with other segments during optimize. Setting
+   *  it low will leave the index with more than 1 segment,
+   *  even if {@link IndexWriter#optimize()} is called.*/
+  public void setMaxMergeMBForOptimize(double mb) {
+    maxMergeSizeForOptimize = (long) (mb*1024*1024);
+  }
+
+  /** Returns the largest segment (measured by total byte
+   *  size of the segment's files, in MB) that may be merged
+   *  with other segments during optimize.
+   *  @see #setMaxMergeMBForOptimize */
+  public double getMaxMergeMBForOptimize() {
+    return ((double) maxMergeSizeForOptimize)/1024/1024;
+  }
+
   /** Sets the minimum size for the lowest level segments.
    * Any segments below this size are considered to be on
    * the same level (even if they vary drastically in size)

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java?rev=1059751&r1=1059750&r2=1059751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java Mon Jan 17 04:47:03 2011
@@ -31,9 +31,10 @@ public class LogDocMergePolicy extends L
   public LogDocMergePolicy() {
     minMergeSize = DEFAULT_MIN_MERGE_DOCS;
     
-    // maxMergeSize is never used by LogDocMergePolicy; set
+    // maxMergeSize(ForOptimize) are never used by LogDocMergePolicy; set
     // it to Long.MAX_VALUE to disable it
     maxMergeSize = Long.MAX_VALUE;
+    maxMergeSizeForOptimize = Long.MAX_VALUE;
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1059751&r1=1059750&r2=1059751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Jan 17 04:47:03 2011
@@ -63,6 +63,9 @@ public abstract class LogMergePolicy ext
 
   protected long minMergeSize;
   protected long maxMergeSize;
+  // Although the core MPs set it explicitly, we must default in case someone
+  // out there wrote his own LMP ...
+  protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
   protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
   protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -240,9 +243,9 @@ public abstract class LogMergePolicy ext
     int start = last - 1;
     while (start >= 0) {
       SegmentInfo info = infos.info(start);
-      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+      if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
         if (verbose()) {
-          message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
+          message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForOptimize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
         }
         // need to skip that segment + add a merge for the 'right' segments,
         // unless there is only 1 which is optimized.
@@ -326,9 +329,12 @@ public abstract class LogMergePolicy ext
   }
   
   /** Returns the merges necessary to optimize the index.
-   *  This merge policy defines "optimized" to mean only one
-   *  segment in the index, where that segment has no
-   *  deletions pending nor separate norms, and it is in
+   *  This merge policy defines "optimized" to mean only the
+   *  requested number of segments is left in the index, and
+   *  respects the {@link #maxMergeSizeForOptimize} setting.
+   *  By default, and assuming {@code maxNumSegments=1}, only
+   *  one segment will be left in the index, where that segment
+   *  has no deletions pending nor separate norms, and it is in
    *  compound file format if the current useCompoundFile
    *  setting is true.  This method returns multiple merges
    *  (mergeFactor at a time) so the {@link MergeScheduler}
@@ -382,7 +388,7 @@ public abstract class LogMergePolicy ext
     boolean anyTooLarge = false;
     for (int i = 0; i < last; i++) {
       SegmentInfo info = infos.info(i);
-      if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+      if (size(info) > maxMergeSizeForOptimize || sizeDocs(info) > maxMergeDocs) {
         anyTooLarge = true;
         break;
       }
@@ -588,6 +594,7 @@ public abstract class LogMergePolicy ext
     sb.append("minMergeSize=").append(minMergeSize).append(", ");
     sb.append("mergeFactor=").append(mergeFactor).append(", ");
     sb.append("maxMergeSize=").append(maxMergeSize).append(", ");
+    sb.append("maxMergeSizeForOptimize=").append(maxMergeSizeForOptimize).append(", ");
     sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
     sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
     sb.append("useCompoundFile=").append(useCompoundFile);

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java?rev=1059751&r1=1059750&r2=1059751&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java Mon Jan 17 04:47:03 2011
@@ -63,7 +63,7 @@ public class TestSizeBoundedOptimize ext
 
     conf = newWriterConfig();
     LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
-    lmp.setMaxMergeMB((min + 1) / (1 << 20));
+    lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20));
     conf.setMergePolicy(lmp);
     
     writer = new IndexWriter(dir, conf);