You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/06/09 15:48:11 UTC

[lucene] branch branch_9x updated: LUCENE-10599: Improve LogMergePolicy's handling of maxMergeSize. (#935)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 26e3bbc7d21 LUCENE-10599: Improve LogMergePolicy's handling of maxMergeSize. (#935)
26e3bbc7d21 is described below

commit 26e3bbc7d21e36f51cee81748d7c5e0222c3e86d
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Jun 9 17:43:19 2022 +0200

    LUCENE-10599: Improve LogMergePolicy's handling of maxMergeSize. (#935)
    
    With this change, segments are more likely to be considered for merging until
    they reach the max merge size. Before this change, LogMergePolicy would exclude
    an entire window of `mergeFactor` segments from merging if this window had a
    too large segment and other segments were on the same tier.
---
 lucene/CHANGES.txt                                 |  4 +-
 .../org/apache/lucene/index/LogMergePolicy.java    | 40 ++++++++++------
 .../apache/lucene/index/TestLogMergePolicy.java    | 56 +++++++++++++++++++++-
 3 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7cfcd0f9e93..a1a8ac3f38f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -44,7 +44,9 @@ Optimizations
 
 Bug Fixes
 ---------------------
-(No changes)
+
+* LUCENE-10599: LogMergePolicy is more likely to keep merging segments until
+  they reach the maximum merge size. (Adrien Grand)
 
 Other
 ---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
index 1d80ba6dfd6..230f146a671 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
@@ -579,23 +579,41 @@ public abstract class LogMergePolicy extends MergePolicy {
       // Finally, record all merges that are viable at this level:
       int end = start + mergeFactor;
       while (end <= 1 + upto) {
-        boolean anyTooLarge = false;
         boolean anyMerging = false;
+        long mergeSize = 0;
+        long mergeDocs = 0;
         for (int i = start; i < end; i++) {
           final SegmentInfoAndLevel segLevel = levels.get(i);
           final SegmentCommitInfo info = segLevel.info;
-          anyTooLarge |=
-              (size(info, mergeContext) >= maxMergeSize
-                  || sizeDocs(info, mergeContext) >= maxMergeDocs);
           if (mergingSegments.contains(info)) {
             anyMerging = true;
             break;
           }
+          long segmentSize = size(info, mergeContext);
+          long segmentDocs = sizeDocs(info, mergeContext);
+          if (mergeSize + segmentSize > maxMergeSize || mergeDocs + segmentDocs > maxMergeDocs) {
+            // This merge is full, stop adding more segments to it
+            if (i == start) {
+              // This segment alone is too large, return a singleton merge
+              if (verbose(mergeContext)) {
+                message(
+                    "    " + i + " is larger than the max merge size/docs; ignoring", mergeContext);
+              }
+              end = i + 1;
+            } else {
+              // Previous segments are under the max merge size, return them
+              end = i;
+            }
+            break;
+          }
+          mergeSize += segmentSize;
+          mergeDocs += segmentDocs;
         }
 
-        if (anyMerging) {
-          // skip
-        } else if (!anyTooLarge) {
+        if (anyMerging || end - start <= 1) {
+          // skip: there is an ongoing merge at the current level or the computed merge has a single
+          // segment and this merge policy doesn't do singleton merges
+        } else {
           if (spec == null) {
             spec = new MergeSpecification();
           }
@@ -615,14 +633,6 @@ public abstract class LogMergePolicy extends MergePolicy {
                 mergeContext);
           }
           spec.add(new OneMerge(mergeInfos));
-        } else if (verbose(mergeContext)) {
-          message(
-              "    "
-                  + start
-                  + " to "
-                  + end
-                  + ": contains segment over maxMergeSize or maxMergeDocs; skipping",
-              mergeContext);
         }
 
         start = end;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java
index dce1e5acb17..0b00a20a17f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestLogMergePolicy.java
@@ -51,7 +51,7 @@ public class TestLogMergePolicy extends BaseMergePolicyTestCase {
   protected void assertMerge(MergePolicy policy, MergeSpecification merge) throws IOException {
     LogMergePolicy lmp = (LogMergePolicy) policy;
     for (OneMerge oneMerge : merge.merges) {
-      assertEquals(lmp.getMergeFactor(), oneMerge.segments.size());
+      assertTrue(oneMerge.segments.size() <= lmp.getMergeFactor());
     }
   }
 
@@ -188,6 +188,60 @@ public class TestLogMergePolicy extends BaseMergePolicyTestCase {
     assertEquals(10, segmentInfos.info(1).info.maxDoc());
   }
 
+  public void testPackLargeSegments() throws IOException {
+    LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
+    IOStats stats = new IOStats();
+    mergePolicy.setMaxMergeDocs(10_000);
+    AtomicLong segNameGenerator = new AtomicLong();
+    MergeContext mergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount);
+    SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
+    // 10 segments below the max segment size, but larger than maxMergeSize/mergeFactor
+    for (int i = 0; i < 10; ++i) {
+      segmentInfos.add(
+          makeSegmentCommitInfo(
+              "_" + segNameGenerator.getAndIncrement(), 3_000, 0, 0, IndexWriter.SOURCE_MERGE));
+    }
+    MergeSpecification spec =
+        mergePolicy.findMerges(MergeTrigger.EXPLICIT, segmentInfos, mergeContext);
+    assertNotNull(spec);
+    for (OneMerge oneMerge : spec.merges) {
+      segmentInfos =
+          applyMerge(segmentInfos, oneMerge, "_" + segNameGenerator.getAndIncrement(), stats);
+    }
+    // LogMP packed 3 3k segments together
+    assertEquals(9_000, segmentInfos.info(0).info.maxDoc());
+  }
+
+  public void testIgnoreLargeSegments() throws IOException {
+    LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
+    IOStats stats = new IOStats();
+    mergePolicy.setMaxMergeDocs(10_000);
+    AtomicLong segNameGenerator = new AtomicLong();
+    MergeContext mergeContext = new MockMergeContext(SegmentCommitInfo::getDelCount);
+    SegmentInfos segmentInfos = new SegmentInfos(Version.LATEST.major);
+    // 1 segment that reached the maximum segment size
+    segmentInfos.add(
+        makeSegmentCommitInfo(
+            "_" + segNameGenerator.getAndIncrement(), 11_000, 0, 0, IndexWriter.SOURCE_MERGE));
+    // and 10 segments below the max segment size, but within the same level
+    for (int i = 0; i < 10; ++i) {
+      segmentInfos.add(
+          makeSegmentCommitInfo(
+              "_" + segNameGenerator.getAndIncrement(), 2_000, 0, 0, IndexWriter.SOURCE_MERGE));
+    }
+    // LogMergePolicy used to have a bug that would make it exclude the first mergeFactor segments
+    // from merging if any of them was above the maximum merged size
+    MergeSpecification spec =
+        mergePolicy.findMerges(MergeTrigger.EXPLICIT, segmentInfos, mergeContext);
+    assertNotNull(spec);
+    for (OneMerge oneMerge : spec.merges) {
+      segmentInfos =
+          applyMerge(segmentInfos, oneMerge, "_" + segNameGenerator.getAndIncrement(), stats);
+    }
+    assertEquals(11_000, segmentInfos.info(0).info.maxDoc());
+    assertEquals(10_000, segmentInfos.info(1).info.maxDoc());
+  }
+
   public void testFullFlushMerges() throws IOException {
     AtomicLong segNameGenerator = new AtomicLong();
     IOStats stats = new IOStats();