You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/08/24 01:15:12 UTC

svn commit: r1376766 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/index/ test-framework/src/java/org/apache/lucene/util/

Author: uschindler
Date: Thu Aug 23 23:15:11 2012
New Revision: 1376766

URL: http://svn.apache.org/viewvc?rev=1376766&view=rev
Log:
LUCENE-4323: Added support for an absolute maximum CFS segment size (in MiB) to LogMergePolicy and TieredMergePolicy

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 23 23:15:11 2012
@@ -20,6 +20,10 @@ New Features
   search performance.  This was from Han Jiang's 2012 Google Summer of
   Code project (Han Jiang, Adrien Grand, Robert Muir, Mike McCandless)
 
+* LUCENE-4323: Added support for an absolute maximum CFS segment size
+  (in MiB) to LogMergePolicy and TieredMergePolicy.
+  (Alexey Lef via Uwe Schindler)
+
 API Changes
 
 * LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -64,6 +64,13 @@ public abstract class LogMergePolicy ext
    *  @see #setNoCFSRatio */
   public static final double DEFAULT_NO_CFS_RATIO = 0.1;
 
+  /** Default maxCFSSegmentSize value allows compound file
+   * for a segment of any size. The actual file format is
+   * still subject to noCFSRatio.
+   * @see #setMaxCFSSegmentSizeMB(double)
+   */
+  public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
+
   protected int mergeFactor = DEFAULT_MERGE_FACTOR;
 
   protected long minMergeSize;
@@ -74,6 +81,7 @@ public abstract class LogMergePolicy ext
   protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
   protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
+  protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
 
   protected boolean calibrateSizeByDeletes = true;
   
@@ -136,21 +144,21 @@ public abstract class LogMergePolicy ext
   // Javadoc inherited
   @Override
   public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
-    final boolean doCFS;
-
-    if (!useCompoundFile) {
-      doCFS = false;
-    } else if (noCFSRatio == 1.0) {
-      doCFS = true;
-    } else {
-      long totalSize = 0;
-      for (SegmentInfoPerCommit info : infos) {
-        totalSize += size(info);
-      }
-
-      doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
+    if (!getUseCompoundFile()) {
+      return false;
+    }
+    long mergedInfoSize = size(mergedInfo);
+    if (mergedInfoSize > maxCFSSegmentSize) {
+      return false;
+    }
+    if (getNoCFSRatio() >= 1.0) {
+      return true;
+    }
+    long totalSize = 0;
+    for (SegmentInfoPerCommit info : infos) {
+      totalSize += size(info);
     }
-    return doCFS;
+    return mergedInfoSize <= getNoCFSRatio() * totalSize;
   }
 
   /** Sets whether compound file format should be used for
@@ -674,9 +682,28 @@ public abstract class LogMergePolicy ext
     sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
     sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
     sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+    sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
     sb.append("noCFSRatio=").append(noCFSRatio);
     sb.append("]");
     return sb.toString();
   }
-  
+
+  /** Returns the largest size allowed for a compound file segment */
+  public final double getMaxCFSSegmentSizeMB() {
+    return maxCFSSegmentSize/1024/1024.;
+  }
+
+  /** If a merged segment will be more than this value,
+   *  leave the segment as
+   *  non-compound file even if compound file is enabled.
+   *  Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
+   *  to always use CFS regardless of merge size. */
+  public final void setMaxCFSSegmentSizeMB(double v) {
+    if (v < 0.0) {
+      throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
+    }
+    v *= 1024 * 1024;
+    this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
+  }
+
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -84,6 +84,7 @@ public class TieredMergePolicy extends M
   private double forceMergeDeletesPctAllowed = 10.0;
   private boolean useCompoundFile = true;
   private double noCFSRatio = 0.1;
+  private long maxCFSSegmentSize = Long.MAX_VALUE;
   private double reclaimDeletesWeight = 2.0;
 
   /** Maximum number of segments to be merged at a time
@@ -127,7 +128,11 @@ public class TieredMergePolicy extends M
    *  sizes of to-be-merged segments (compensating for
    *  percent deleted docs).  Default is 5 GB. */
   public TieredMergePolicy setMaxMergedSegmentMB(double v) {
-    maxMergedSegmentBytes = (long) (v*1024*1024);
+    if (v < 0.0) {
+      throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
+    }
+    v *= 1024 * 1024;
+    maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
     return this;
   }
 
@@ -162,7 +167,8 @@ public class TieredMergePolicy extends M
     if (v <= 0.0) {
       throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
     }
-    floorSegmentBytes = (long) (v*1024*1024);
+    v *= 1024 * 1024;
+    floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
     return this;
   }
 
@@ -602,21 +608,21 @@ public class TieredMergePolicy extends M
 
   @Override
   public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
-    final boolean doCFS;
-
-    if (!useCompoundFile) {
-      doCFS = false;
-    } else if (noCFSRatio == 1.0) {
-      doCFS = true;
-    } else {
-      long totalSize = 0;
-      for (SegmentInfoPerCommit info : infos) {
+    if (!getUseCompoundFile()) {
+        return false;
+    }
+    long mergedInfoSize = size(mergedInfo);
+    if (mergedInfoSize > maxCFSSegmentSize) {
+        return false;
+    }
+    if (getNoCFSRatio() >= 1.0) {
+        return true;
+    }
+    long totalSize = 0;
+    for (SegmentInfoPerCommit info : infos) {
         totalSize += size(info);
-      }
-
-      doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
     }
-    return doCFS;
+    return mergedInfoSize <= getNoCFSRatio() * totalSize;
   }
 
   @Override
@@ -629,7 +635,7 @@ public class TieredMergePolicy extends M
     boolean hasDeletions = w.numDeletedDocs(info) > 0;
     return !hasDeletions &&
       info.info.dir == w.getDirectory() &&
-      (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
+      (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0 || maxCFSSegmentSize < Long.MAX_VALUE);
   }
 
   // Segment size in bytes, pro-rated by % deleted
@@ -664,7 +670,27 @@ public class TieredMergePolicy extends M
     sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", ");
     sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
     sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+    sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
     sb.append("noCFSRatio=").append(noCFSRatio);
     return sb.toString();
   }
+
+  /** Returns the largest size allowed for a compound file segment */
+  public final double getMaxCFSSegmentSizeMB() {
+    return maxCFSSegmentSize/1024/1024.;
+  }
+
+  /** If a merged segment will be more than this value,
+   *  leave the segment as
+   *  non-compound file even if compound file is enabled.
+   *  Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
+   *  to always use CFS regardless of merge size. */
+  public final TieredMergePolicy setMaxCFSSegmentSizeMB(double v) {
+    if (v < 0.0) {
+      throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
+    }
+    v *= 1024 * 1024;
+    this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
+    return this;
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -263,4 +263,31 @@ public class TestIndexWriterMergePolicy 
       assertTrue(numSegments < mergeFactor);
     }
   }
+
+  private static final double EPSILON = 1E-14;
+  
+  public void testSetters() {
+    assertSetters(new LogByteSizeMergePolicy());
+    assertSetters(new LogDocMergePolicy());
+  }
+
+  private void assertSetters(LogMergePolicy lmp) {
+    lmp.setMaxCFSSegmentSizeMB(2.0);
+    assertEquals(2.0, lmp.getMaxCFSSegmentSizeMB(), EPSILON);
+    
+    lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+    assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+    
+    lmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
+    assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+    
+    try {
+      lmp.setMaxCFSSegmentSizeMB(-2.0);
+      fail("Didn't throw IllegalArgumentException");
+    } catch (IllegalArgumentException iae) {
+      // pass
+    }
+    
+    // TODO: Add more checks for other non-double setters!
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -153,4 +153,60 @@ public class TestTieredMergePolicy exten
 
     dir.close();
   }
+  
+  private static final double EPSILON = 1E-14;
+  
+  public void testSetters() {
+    final TieredMergePolicy tmp = new TieredMergePolicy();
+    
+    tmp.setMaxMergedSegmentMB(0.5);
+    assertEquals(0.5, tmp.getMaxMergedSegmentMB(), EPSILON);
+    
+    tmp.setMaxMergedSegmentMB(Double.POSITIVE_INFINITY);
+    assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
+    
+    tmp.setMaxMergedSegmentMB(Long.MAX_VALUE/1024/1024.);
+    assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
+    
+    try {
+      tmp.setMaxMergedSegmentMB(-2.0);
+      fail("Didn't throw IllegalArgumentException");
+    } catch (IllegalArgumentException iae) {
+      // pass
+    }
+    
+    tmp.setFloorSegmentMB(2.0);
+    assertEquals(2.0, tmp.getFloorSegmentMB(), EPSILON);
+    
+    tmp.setFloorSegmentMB(Double.POSITIVE_INFINITY);
+    assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
+    
+    tmp.setFloorSegmentMB(Long.MAX_VALUE/1024/1024.);
+    assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
+    
+    try {
+      tmp.setFloorSegmentMB(-2.0);
+      fail("Didn't throw IllegalArgumentException");
+    } catch (IllegalArgumentException iae) {
+      // pass
+    }
+    
+    tmp.setMaxCFSSegmentSizeMB(2.0);
+    assertEquals(2.0, tmp.getMaxCFSSegmentSizeMB(), EPSILON);
+    
+    tmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+    assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+    
+    tmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
+    assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+    
+    try {
+      tmp.setMaxCFSSegmentSizeMB(-2.0);
+      fail("Didn't throw IllegalArgumentException");
+    } catch (IllegalArgumentException iae) {
+      // pass
+    }
+    
+    // TODO: Add more checks for other non-double setters!
+  }
 }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Thu Aug 23 23:15:11 2012
@@ -765,6 +765,11 @@ public abstract class LuceneTestCase ext
     } else {
       logmp.setMergeFactor(_TestUtil.nextInt(r, 10, 50));
     }
+    logmp.setUseCompoundFile(r.nextBoolean());
+    logmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+    if (rarely()) {
+      logmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
+    }
     return logmp;
   }
 
@@ -791,6 +796,9 @@ public abstract class LuceneTestCase ext
     }
     tmp.setUseCompoundFile(r.nextBoolean());
     tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+    if (rarely()) {
+      tmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
+    }
     tmp.setReclaimDeletesWeight(r.nextDouble()*4);
     return tmp;
   }