You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/08/24 01:15:12 UTC
svn commit: r1376766 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/index/
test-framework/src/java/org/apache/lucene/util/
Author: uschindler
Date: Thu Aug 23 23:15:11 2012
New Revision: 1376766
URL: http://svn.apache.org/viewvc?rev=1376766&view=rev
Log:
LUCENE-4323: Added support for an absolute maximum CFS segment size (in MiB) to LogMergePolicy and TieredMergePolicy
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 23 23:15:11 2012
@@ -20,6 +20,10 @@ New Features
search performance. This was from Han Jiang's 2012 Google Summer of
Code project (Han Jiang, Adrien Grand, Robert Muir, Mike McCandless)
+* LUCENE-4323: Added support for an absolute maximum CFS segment size
+ (in MiB) to LogMergePolicy and TieredMergePolicy.
+ (Alexey Lef via Uwe Schindler)
+
API Changes
* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -64,6 +64,13 @@ public abstract class LogMergePolicy ext
* @see #setNoCFSRatio */
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
+ /** Default maxCFSSegmentSize value allows compound file
+ * for a segment of any size. The actual file format is
+ * still subject to noCFSRatio.
+ * @see #setMaxCFSSegmentSizeMB(double)
+ */
+ public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
+
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
protected long minMergeSize;
@@ -74,6 +81,7 @@ public abstract class LogMergePolicy ext
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
+ protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
protected boolean calibrateSizeByDeletes = true;
@@ -136,21 +144,21 @@ public abstract class LogMergePolicy ext
// Javadoc inherited
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
- final boolean doCFS;
-
- if (!useCompoundFile) {
- doCFS = false;
- } else if (noCFSRatio == 1.0) {
- doCFS = true;
- } else {
- long totalSize = 0;
- for (SegmentInfoPerCommit info : infos) {
- totalSize += size(info);
- }
-
- doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
+ if (!getUseCompoundFile()) {
+ return false;
+ }
+ long mergedInfoSize = size(mergedInfo);
+ if (mergedInfoSize > maxCFSSegmentSize) {
+ return false;
+ }
+ if (getNoCFSRatio() >= 1.0) {
+ return true;
+ }
+ long totalSize = 0;
+ for (SegmentInfoPerCommit info : infos) {
+ totalSize += size(info);
}
- return doCFS;
+ return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
/** Sets whether compound file format should be used for
@@ -674,9 +682,28 @@ public abstract class LogMergePolicy ext
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+ sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
-
+
+ /** Returns the largest size allowed for a compound file segment */
+ public final double getMaxCFSSegmentSizeMB() {
+ return maxCFSSegmentSize/1024/1024.;
+ }
+
+ /** If a merged segment will be more than this value,
+ * leave the segment as
+ * non-compound file even if compound file is enabled.
+ * Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
+ * to always use CFS regardless of merge size. */
+ public final void setMaxCFSSegmentSizeMB(double v) {
+ if (v < 0.0) {
+ throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
+ }
+ v *= 1024 * 1024;
+ this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
+ }
+
}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -84,6 +84,7 @@ public class TieredMergePolicy extends M
private double forceMergeDeletesPctAllowed = 10.0;
private boolean useCompoundFile = true;
private double noCFSRatio = 0.1;
+ private long maxCFSSegmentSize = Long.MAX_VALUE;
private double reclaimDeletesWeight = 2.0;
/** Maximum number of segments to be merged at a time
@@ -127,7 +128,11 @@ public class TieredMergePolicy extends M
* sizes of to-be-merged segments (compensating for
* percent deleted docs). Default is 5 GB. */
public TieredMergePolicy setMaxMergedSegmentMB(double v) {
- maxMergedSegmentBytes = (long) (v*1024*1024);
+ if (v < 0.0) {
+ throw new IllegalArgumentException("maxMergedSegmentMB must be >=0 (got " + v + ")");
+ }
+ v *= 1024 * 1024;
+ maxMergedSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
return this;
}
@@ -162,7 +167,8 @@ public class TieredMergePolicy extends M
if (v <= 0.0) {
throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
}
- floorSegmentBytes = (long) (v*1024*1024);
+ v *= 1024 * 1024;
+ floorSegmentBytes = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
return this;
}
@@ -602,21 +608,21 @@ public class TieredMergePolicy extends M
@Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
- final boolean doCFS;
-
- if (!useCompoundFile) {
- doCFS = false;
- } else if (noCFSRatio == 1.0) {
- doCFS = true;
- } else {
- long totalSize = 0;
- for (SegmentInfoPerCommit info : infos) {
+ if (!getUseCompoundFile()) {
+ return false;
+ }
+ long mergedInfoSize = size(mergedInfo);
+ if (mergedInfoSize > maxCFSSegmentSize) {
+ return false;
+ }
+ if (getNoCFSRatio() >= 1.0) {
+ return true;
+ }
+ long totalSize = 0;
+ for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
- }
-
- doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
}
- return doCFS;
+ return mergedInfoSize <= getNoCFSRatio() * totalSize;
}
@Override
@@ -629,7 +635,7 @@ public class TieredMergePolicy extends M
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
- (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
+ (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0 || maxCFSSegmentSize < Long.MAX_VALUE);
}
// Segment size in bytes, pro-rated by % deleted
@@ -664,7 +670,27 @@ public class TieredMergePolicy extends M
sb.append("forceMergeDeletesPctAllowed=").append(forceMergeDeletesPctAllowed).append(", ");
sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+ sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
return sb.toString();
}
+
+ /** Returns the largest size allowed for a compound file segment */
+ public final double getMaxCFSSegmentSizeMB() {
+ return maxCFSSegmentSize/1024/1024.;
+ }
+
+ /** If a merged segment will be more than this value,
+ * leave the segment as
+ * non-compound file even if compound file is enabled.
+ * Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
+ * to always use CFS regardless of merge size. */
+ public final TieredMergePolicy setMaxCFSSegmentSizeMB(double v) {
+ if (v < 0.0) {
+ throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
+ }
+ v *= 1024 * 1024;
+ this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
+ return this;
+ }
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -263,4 +263,31 @@ public class TestIndexWriterMergePolicy
assertTrue(numSegments < mergeFactor);
}
}
+
+ private static final double EPSILON = 1E-14;
+
+ public void testSetters() {
+ assertSetters(new LogByteSizeMergePolicy());
+ assertSetters(new LogDocMergePolicy());
+ }
+
+ private void assertSetters(LogMergePolicy lmp) {
+ lmp.setMaxCFSSegmentSizeMB(2.0);
+ assertEquals(2.0, lmp.getMaxCFSSegmentSizeMB(), EPSILON);
+
+ lmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+ assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+
+ lmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
+ assertEquals(Long.MAX_VALUE/1024/1024., lmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+
+ try {
+ lmp.setMaxCFSSegmentSizeMB(-2.0);
+ fail("Didn't throw IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // pass
+ }
+
+ // TODO: Add more checks for other non-double setters!
+ }
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java Thu Aug 23 23:15:11 2012
@@ -153,4 +153,60 @@ public class TestTieredMergePolicy exten
dir.close();
}
+
+ private static final double EPSILON = 1E-14;
+
+ public void testSetters() {
+ final TieredMergePolicy tmp = new TieredMergePolicy();
+
+ tmp.setMaxMergedSegmentMB(0.5);
+ assertEquals(0.5, tmp.getMaxMergedSegmentMB(), EPSILON);
+
+ tmp.setMaxMergedSegmentMB(Double.POSITIVE_INFINITY);
+ assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
+
+ tmp.setMaxMergedSegmentMB(Long.MAX_VALUE/1024/1024.);
+ assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxMergedSegmentMB(), EPSILON*Long.MAX_VALUE);
+
+ try {
+ tmp.setMaxMergedSegmentMB(-2.0);
+ fail("Didn't throw IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // pass
+ }
+
+ tmp.setFloorSegmentMB(2.0);
+ assertEquals(2.0, tmp.getFloorSegmentMB(), EPSILON);
+
+ tmp.setFloorSegmentMB(Double.POSITIVE_INFINITY);
+ assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
+
+ tmp.setFloorSegmentMB(Long.MAX_VALUE/1024/1024.);
+ assertEquals(Long.MAX_VALUE/1024/1024., tmp.getFloorSegmentMB(), EPSILON*Long.MAX_VALUE);
+
+ try {
+ tmp.setFloorSegmentMB(-2.0);
+ fail("Didn't throw IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // pass
+ }
+
+ tmp.setMaxCFSSegmentSizeMB(2.0);
+ assertEquals(2.0, tmp.getMaxCFSSegmentSizeMB(), EPSILON);
+
+ tmp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+ assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+
+ tmp.setMaxCFSSegmentSizeMB(Long.MAX_VALUE/1024/1024.);
+ assertEquals(Long.MAX_VALUE/1024/1024., tmp.getMaxCFSSegmentSizeMB(), EPSILON*Long.MAX_VALUE);
+
+ try {
+ tmp.setMaxCFSSegmentSizeMB(-2.0);
+ fail("Didn't throw IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // pass
+ }
+
+ // TODO: Add more checks for other non-double setters!
+ }
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java?rev=1376766&r1=1376765&r2=1376766&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java Thu Aug 23 23:15:11 2012
@@ -765,6 +765,11 @@ public abstract class LuceneTestCase ext
} else {
logmp.setMergeFactor(_TestUtil.nextInt(r, 10, 50));
}
+ logmp.setUseCompoundFile(r.nextBoolean());
+ logmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+ if (rarely()) {
+ logmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
+ }
return logmp;
}
@@ -791,6 +796,9 @@ public abstract class LuceneTestCase ext
}
tmp.setUseCompoundFile(r.nextBoolean());
tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+ if (rarely()) {
+ tmp.setMaxCFSSegmentSizeMB(0.2 + r.nextDouble() * 2.0);
+ }
tmp.setReclaimDeletesWeight(r.nextDouble()*4);
return tmp;
}