You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2020/02/05 15:02:06 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9200: consistently
use double (not float) math for TieredMergePolicy's decisions,
to fix a corner-case bug uncovered by randomized tests
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 3e63cd3 LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests
3e63cd3 is described below
commit 3e63cd38ef0e5c70c2644322935e61e46b22263f
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Wed Feb 5 09:51:31 2020 -0500
LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests
---
lucene/CHANGES.txt | 3 ++
.../java/org/apache/lucene/index/MergePolicy.java | 2 +-
.../apache/lucene/index/TestTieredMergePolicy.java | 33 +++++++++++++++-------
.../lucene/index/BaseMergePolicyTestCase.java | 9 +++++-
4 files changed, 35 insertions(+), 12 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5569d68..6a28efe 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,9 @@ Bug Fixes
* LUCENE-9135: Make UniformSplit FieldMetadata counters long. (Bruno Roustant)
+* LUCENE-9200: Fix TieredMergePolicy to use double (not float) math to make its merging decisions, fixing
+ a corner-case bug uncovered by fun randomized tests (Robert Muir, Mike McCandless)
+
Other
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
index dc8a1d3..be535a7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
@@ -558,7 +558,7 @@ public abstract class MergePolicy {
long byteSize = info.sizeInBytes();
int delCount = mergeContext.numDeletesToMerge(info);
assert assertDelCount(delCount, info);
- double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc();
+ double delRatio = info.info.maxDoc() <= 0 ? 0d : (double) delCount / (double) info.info.maxDoc();
assert delRatio <= 1.0;
return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
index 0285f65..2109cd5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
@@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -57,7 +58,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
totalMaxDoc += sci.info.maxDoc();
long byteSize = sci.sizeInBytes();
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
- long weightedByteSize = Math.round(liveRatio * byteSize);
+ long weightedByteSize = (long) (liveRatio * byteSize);
totalBytes += weightedByteSize;
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
}
@@ -66,26 +67,38 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
assertTrue("Percentage of deleted docs " + delPercentage + " is larger than the target: " + tmp.getDeletesPctAllowed(),
delPercentage <= tmp.getDeletesPctAllowed());
- long levelSize = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
+ long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
long bytesLeft = totalBytes;
double allowedSegCount = 0;
// below we make the assumption that segments that reached the max segment
// size divided by 2 don't need merging anymore
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
while (true) {
- final double segCountLevel = bytesLeft / (double) levelSize;
- if (segCountLevel < tmp.getSegmentsPerTier() || levelSize >= maxMergedSegmentBytes / 2) {
+ final double segCountLevel = bytesLeft / (double) levelSizeBytes;
+ if (segCountLevel < tmp.getSegmentsPerTier() || levelSizeBytes >= maxMergedSegmentBytes / 2) {
allowedSegCount += Math.ceil(segCountLevel);
break;
}
allowedSegCount += tmp.getSegmentsPerTier();
- bytesLeft -= tmp.getSegmentsPerTier() * levelSize;
- levelSize = Math.min(levelSize * mergeFactor, maxMergedSegmentBytes / 2);
+ bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
+ levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
}
allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
int numSegments = infos.asList().size();
- assertTrue("numSegments=" + numSegments + ", allowed=" + allowedSegCount, numSegments <= allowedSegCount);
+ assertTrue(String.format(Locale.ROOT,
+ "mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g",
+ mergeFactor,
+ minSegmentBytes,
+ maxMergedSegmentBytes,
+ tmp.getSegmentsPerTier(),
+ tmp.getMaxMergeAtOnce(),
+ numSegments,
+ allowedSegCount,
+ totalBytes,
+ delPercentage,
+ tmp.getDeletesPctAllowed()),
+ numSegments <= allowedSegCount);
}
@Override
@@ -710,13 +723,13 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
doTestSimulateAppendOnly(mergePolicy, 100_000_000, 10_000);
}
- @Override @Nightly
- // TODO: this test has bugs that prevent you from lowering the number of docs in the test!
+ @Override
public void testSimulateUpdates() throws IOException {
TieredMergePolicy mergePolicy = mergePolicy();
// Avoid low values of the max merged segment size which prevent this merge policy from scaling well
mergePolicy.setMaxMergedSegmentMB(TestUtil.nextInt(random(), 1024, 10 * 1024));
- doTestSimulateUpdates(mergePolicy, 10_000_000, 2500);
+ int numDocs = TEST_NIGHTLY ? atLeast(10_000_000) : atLeast(1_000_000);
+ doTestSimulateUpdates(mergePolicy, numDocs, 2500);
}
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java
index 7fc3adc..9928c84 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java
@@ -404,7 +404,14 @@ public abstract class BaseMergePolicyTestCase extends LuceneTestCase {
SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
final double avgDocSizeMB = 5. / 1024; // 5kB
for (int numDocs = 0; numDocs < totalDocs; ) {
- int flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
+ final int flushDocCount;
+ if (usually()) {
+ // reasonable value
+ flushDocCount = TestUtil.nextInt(random(), maxDocsPerFlush/2, maxDocsPerFlush);
+ } else {
+ // crazy value
+ flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
+ }
// how many of these documents are actually updates
int delCount = (int) (flushDocCount * 0.9 * numDocs / totalDocs);
numDocs += flushDocCount - delCount;