You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2020/02/05 15:02:06 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests

This is an automated email from the ASF dual-hosted git repository.

mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 3e63cd3  LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests
3e63cd3 is described below

commit 3e63cd38ef0e5c70c2644322935e61e46b22263f
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Wed Feb 5 09:51:31 2020 -0500

    LUCENE-9200: consistently use double (not float) math for TieredMergePolicy's decisions, to fix a corner-case bug uncovered by randomized tests
---
 lucene/CHANGES.txt                                 |  3 ++
 .../java/org/apache/lucene/index/MergePolicy.java  |  2 +-
 .../apache/lucene/index/TestTieredMergePolicy.java | 33 +++++++++++++++-------
 .../lucene/index/BaseMergePolicyTestCase.java      |  9 +++++-
 4 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5569d68..6a28efe 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,9 @@ Bug Fixes
 
 * LUCENE-9135: Make UniformSplit FieldMetadata counters long. (Bruno Roustant)
 
+* LUCENE-9200: Fix TieredMergePolicy to use double (not float) math to make its merging decisions, fixing
+  a corner-case bug uncovered by fun randomized tests (Robert Muir, Mike McCandless)
+
 Other
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
index dc8a1d3..be535a7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
@@ -558,7 +558,7 @@ public abstract class MergePolicy {
     long byteSize = info.sizeInBytes();
     int delCount = mergeContext.numDeletesToMerge(info);
     assert assertDelCount(delCount, info);
-    double delRatio = info.info.maxDoc() <= 0 ? 0.0f : (float) delCount / (float) info.info.maxDoc();
+    double delRatio = info.info.maxDoc() <= 0 ? 0d : (double) delCount / (double) info.info.maxDoc();
     assert delRatio <= 1.0;
     return (info.info.maxDoc() <= 0 ? byteSize : (long) (byteSize * (1.0 - delRatio)));
   }
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
index 0285f65..2109cd5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
@@ -23,6 +23,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -57,7 +58,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
       totalMaxDoc += sci.info.maxDoc();
       long byteSize = sci.sizeInBytes();
       double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
-      long weightedByteSize = Math.round(liveRatio * byteSize);
+      long weightedByteSize = (long) (liveRatio * byteSize);
       totalBytes += weightedByteSize;
       minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
     }
@@ -66,26 +67,38 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
     assertTrue("Percentage of deleted docs " + delPercentage + " is larger than the target: " + tmp.getDeletesPctAllowed(),
         delPercentage <= tmp.getDeletesPctAllowed());
 
-    long levelSize = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
+    long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
     long bytesLeft = totalBytes;
     double allowedSegCount = 0;
     // below we make the assumption that segments that reached the max segment
     // size divided by 2 don't need merging anymore
     int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
     while (true) {
-      final double segCountLevel = bytesLeft / (double) levelSize;
-      if (segCountLevel < tmp.getSegmentsPerTier() || levelSize >= maxMergedSegmentBytes / 2) {
+      final double segCountLevel = bytesLeft / (double) levelSizeBytes;
+      if (segCountLevel < tmp.getSegmentsPerTier() || levelSizeBytes >= maxMergedSegmentBytes / 2) {
         allowedSegCount += Math.ceil(segCountLevel);
         break;
       }
       allowedSegCount += tmp.getSegmentsPerTier();
-      bytesLeft -= tmp.getSegmentsPerTier() * levelSize;
-      levelSize = Math.min(levelSize * mergeFactor, maxMergedSegmentBytes / 2);
+      bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
+      levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
     }
     allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
 
     int numSegments = infos.asList().size();
-    assertTrue("numSegments=" + numSegments + ", allowed=" + allowedSegCount, numSegments <= allowedSegCount);
+    assertTrue(String.format(Locale.ROOT,
+                             "mergeFactor=%d minSegmentBytes=%,d maxMergedSegmentBytes=%,d segmentsPerTier=%g maxMergeAtOnce=%d numSegments=%d allowed=%g totalBytes=%,d delPercentage=%g deletesPctAllowed=%g",
+                             mergeFactor,
+                             minSegmentBytes,
+                             maxMergedSegmentBytes,
+                             tmp.getSegmentsPerTier(),
+                             tmp.getMaxMergeAtOnce(),
+                             numSegments,
+                             allowedSegCount,
+                             totalBytes,
+                             delPercentage,
+                             tmp.getDeletesPctAllowed()),
+                             numSegments <= allowedSegCount);
   }
 
   @Override
@@ -710,13 +723,13 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
     doTestSimulateAppendOnly(mergePolicy, 100_000_000, 10_000);
   }
 
-  @Override @Nightly
-  // TODO: this test has bugs that prevent you from lowering the number of docs in the test!
+  @Override
   public void testSimulateUpdates() throws IOException {
     TieredMergePolicy mergePolicy = mergePolicy();
     // Avoid low values of the max merged segment size which prevent this merge policy from scaling well
     mergePolicy.setMaxMergedSegmentMB(TestUtil.nextInt(random(), 1024, 10 * 1024));
-    doTestSimulateUpdates(mergePolicy, 10_000_000, 2500);
+    int numDocs = TEST_NIGHTLY ? atLeast(10_000_000) : atLeast(1_000_000);
+    doTestSimulateUpdates(mergePolicy, numDocs, 2500);
   }
 
 }
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java
index 7fc3adc..9928c84 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java
@@ -404,7 +404,14 @@ public abstract class BaseMergePolicyTestCase extends LuceneTestCase {
     SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
     final double avgDocSizeMB = 5. / 1024; // 5kB
     for (int numDocs = 0; numDocs < totalDocs; ) {
-      int flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
+      final int flushDocCount;
+      if (usually()) {
+        // reasonable value
+        flushDocCount = TestUtil.nextInt(random(), maxDocsPerFlush/2, maxDocsPerFlush);
+      } else {
+        // crazy value
+        flushDocCount = TestUtil.nextInt(random(), 1, maxDocsPerFlush);
+      }
       // how many of these documents are actually updates
       int delCount = (int) (flushDocCount * 0.9 * numDocs / totalDocs);
       numDocs += flushDocCount - delCount;