You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by pa...@apache.org on 2022/11/22 07:52:46 UTC

[lucene] branch main updated: Support deletions in rearrange (#11815)

This is an automated email from the ASF dual-hosted git repository.

patrickz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 369a70f2894 Support deletions in rearrange (#11815)
369a70f2894 is described below

commit 369a70f28948e748ed157c0731f36c367c4a03fa
Author: Stefan Vodita <41...@users.noreply.github.com>
AuthorDate: Tue Nov 22 07:52:38 2022 +0000

    Support deletions in rearrange (#11815)
    
    * Support deletions in rearrange
    * Store BinaryDocValues in the binary doc value selector as ByteRef
       instead of String.
---
 lucene/CHANGES.txt                                 |   2 +
 .../lucene/misc/index/BinaryDocValueSelector.java  |  83 +++++--
 .../apache/lucene/misc/index/IndexRearranger.java  | 168 ++++++++++---
 ...ranger.java => TestBinaryDocValueSelector.java} | 104 ++-------
 .../lucene/misc/index/TestIndexRearranger.java     | 260 +++++++++++++++------
 5 files changed, 414 insertions(+), 203 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5257c959e32..45b1c4944fb 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -37,6 +37,8 @@ API Changes
 * GITHUB#11840: Query rewrite now takes an IndexSearcher instead of IndexReader to enable concurrent
   rewriting. (Patrick Zhai)
 
+* GITHUB#11814: Support deletions in IndexRearranger. (Stefan Vodita)
+
 New Features
 ---------------------
 
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/BinaryDocValueSelector.java b/lucene/misc/src/java/org/apache/lucene/misc/index/BinaryDocValueSelector.java
index 082675f4dcc..c1a54b72476 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/BinaryDocValueSelector.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/BinaryDocValueSelector.java
@@ -22,6 +22,7 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.DirectoryReader;
@@ -30,52 +31,94 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 
-/** Select documents using binary doc values */
+/**
+ * Use this selector to rearrange an index where documents can be uniquely identified based on
+ * {@link BinaryDocValues}
+ */
 public class BinaryDocValueSelector implements IndexRearranger.DocumentSelector, Serializable {
 
   private final String field;
-  private final HashSet<String> keySet;
+  private final Set<BytesRef> keySet;
 
-  public BinaryDocValueSelector(String field, HashSet<String> keySet) {
+  public BinaryDocValueSelector(String field, Set<BytesRef> keySet) {
     this.field = field;
     this.keySet = keySet;
   }
 
   @Override
-  public BitSet getFilteredLiveDocs(CodecReader reader) throws IOException {
+  public BitSet getFilteredDocs(CodecReader reader) throws IOException {
     BinaryDocValues binaryDocValues = reader.getBinaryDocValues(field);
-    Bits oldLiveDocs = reader.getLiveDocs();
     FixedBitSet bits = new FixedBitSet(reader.maxDoc());
-    for (int i = 0; i < reader.maxDoc(); i++) {
-      if (oldLiveDocs != null && oldLiveDocs.get(i) == false) {
-        continue;
-      }
-      if (binaryDocValues.advanceExact(i)
-          && keySet.contains(binaryDocValues.binaryValue().utf8ToString())) {
-        bits.set(i);
+    for (int docid = 0; docid < reader.maxDoc(); docid++) {
+      if (binaryDocValues.advanceExact(docid) && keySet.contains(binaryDocValues.binaryValue())) {
+        bits.set(docid);
       }
     }
     return bits;
   }
 
-  public static List<IndexRearranger.DocumentSelector> createFromExistingIndex(
+  /**
+   * Create a selector for the deletes in an index, which can then be applied to a rearranged index
+   *
+   * @param field tells which {@link BinaryDocValues} are the unique key
+   * @param directory where the original index is present
+   * @return a deletes selector to be passed to {@link IndexRearranger}
+   */
+  public static IndexRearranger.DocumentSelector createDeleteSelectorFromIndex(
       String field, Directory directory) throws IOException {
-    List<IndexRearranger.DocumentSelector> selectors = new ArrayList<>();
+
+    Set<BytesRef> keySet = new HashSet<>();
+
     try (IndexReader reader = DirectoryReader.open(directory)) {
       for (LeafReaderContext context : reader.leaves()) {
-        HashSet<String> keySet = new HashSet<>();
         Bits liveDocs = context.reader().getLiveDocs();
+        if (liveDocs == null) {
+          continue;
+        }
+
         BinaryDocValues binaryDocValues = context.reader().getBinaryDocValues(field);
-        for (int i = 0; i < context.reader().maxDoc(); i++) {
-          if (liveDocs != null && liveDocs.get(i) == false) {
+        for (int docid = 0; docid < context.reader().maxDoc(); docid++) {
+          if (liveDocs.get(docid) == true) {
             continue;
           }
-          if (binaryDocValues.advanceExact(i)) {
-            keySet.add(binaryDocValues.binaryValue().utf8ToString());
+
+          if (binaryDocValues.advanceExact(docid)) {
+            keySet.add(BytesRef.deepCopyOf(binaryDocValues.binaryValue()));
+          } else {
+            throw new AssertionError("Document " + docid + " doesn't have key " + field);
+          }
+        }
+      }
+    }
+    return new BinaryDocValueSelector(field, keySet);
+  }
+
+  /**
+   * Create a list of selectors that will reproduce the index geometry when used with {@link
+   * IndexRearranger}
+   *
+   * @param field tells which {@link BinaryDocValues} are the unique key
+   * @param directory where the original index is present
+   * @return a list of selectors to be passed to {@link IndexRearranger}
+   */
+  public static List<IndexRearranger.DocumentSelector> createLiveSelectorsFromIndex(
+      String field, Directory directory) throws IOException {
+
+    List<IndexRearranger.DocumentSelector> selectors = new ArrayList<>();
+
+    try (IndexReader reader = DirectoryReader.open(directory)) {
+      for (LeafReaderContext context : reader.leaves()) {
+        Set<BytesRef> keySet = new HashSet<>();
+        BinaryDocValues binaryDocValues = context.reader().getBinaryDocValues(field);
+
+        for (int docid = 0; docid < context.reader().maxDoc(); docid++) {
+          if (binaryDocValues.advanceExact(docid)) {
+            keySet.add(BytesRef.deepCopyOf(binaryDocValues.binaryValue()));
           } else {
-            throw new AssertionError("Document don't have selected key");
+            throw new AssertionError("Document " + docid + " doesn't have key " + field);
           }
         }
         selectors.add(new BinaryDocValueSelector(field, keySet));
diff --git a/lucene/misc/src/java/org/apache/lucene/misc/index/IndexRearranger.java b/lucene/misc/src/java/org/apache/lucene/misc/index/IndexRearranger.java
index c05a1a7b959..7c3bd53cece 100644
--- a/lucene/misc/src/java/org/apache/lucene/misc/index/IndexRearranger.java
+++ b/lucene/misc/src/java/org/apache/lucene/misc/index/IndexRearranger.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -43,7 +44,20 @@ import org.apache.lucene.util.NamedThreadFactory;
 
 /**
  * Copy and rearrange index according to document selectors, from input dir to output dir. Length of
- * documentSelectors determines how many segments there will be
+ * documentSelectors determines how many segments there will be.
+ *
+ * <p>Rearranging works in 3 steps: 1. Assume all docs in the original index are live and create the
+ * rearranged index using the segment selectors. 2. Go through the rearranged index and apply
+ * deletes requested by the deletes selector. 3. Reorder the segments to match the order of the
+ * selectors and check the validity of the rearranged index.
+ *
+ * <p>NB: You can't produce segments that only contain deletes. If you select all documents in a
+ * segment for deletion, the entire segment will be discarded.
+ *
+ * <p>Example use case: You are testing search performance after a change to indexing. You can index
+ * the same content using the old and new indexers and then rearrange one of them to the shape of
+ * the other. Using rearrange will give more accurate measurements, since you will not be
+ * introducing noise from index geometry.
  *
  * <p>TODO: another possible (faster) approach to do this is to manipulate FlushPolicy and
  * MergePolicy at indexing time to create small desired segments first and merge them accordingly
@@ -54,59 +68,89 @@ import org.apache.lucene.util.NamedThreadFactory;
 public class IndexRearranger {
   protected final Directory input, output;
   protected final IndexWriterConfig config;
-  protected final List<DocumentSelector> documentSelectors;
+
+  // Each of these selectors will produce a segment in the rearranged index.
+  // The segments will appear in the index in the order of the selectors that produced them.
+  protected final List<DocumentSelector> segmentSelectors;
+
+  // Documents selected here will be marked for deletion in the rearranged index, but not merged
+  // away.
+  protected final DocumentSelector deletedDocsSelector;
 
   /**
-   * Constructor
+   * All args constructor
    *
    * @param input input dir
    * @param output output dir
    * @param config index writer config
-   * @param documentSelectors specify what document is desired in the rearranged index segments,
-   *     each selector correspond to one segment
+   * @param segmentSelectors specify which documents are desired in the rearranged index segments;
+   *     each selector corresponds to one segment
+   * @param deletedDocsSelector specify which documents are to be marked for deletion in the
+   *     rearranged index; this selector should be thread-safe
    */
   public IndexRearranger(
       Directory input,
       Directory output,
       IndexWriterConfig config,
-      List<DocumentSelector> documentSelectors) {
+      List<DocumentSelector> segmentSelectors,
+      DocumentSelector deletedDocsSelector) {
     this.input = input;
     this.output = output;
     this.config = config;
-    this.documentSelectors = documentSelectors;
+    this.segmentSelectors = segmentSelectors;
+    this.deletedDocsSelector = deletedDocsSelector;
+  }
+
+  /** Constructor with no deletes to apply */
+  public IndexRearranger(
+      Directory input,
+      Directory output,
+      IndexWriterConfig config,
+      List<DocumentSelector> segmentSelectors) {
+    this(input, output, config, segmentSelectors, null);
   }
 
   public void execute() throws Exception {
-    config.setMergePolicy(
-        NoMergePolicy.INSTANCE); // do not merge since one addIndexes call create one segment
-    try (IndexWriter writer = new IndexWriter(output, config);
+    ExecutorService executor =
+        Executors.newFixedThreadPool(
+            Math.min(Runtime.getRuntime().availableProcessors(), segmentSelectors.size()),
+            new NamedThreadFactory("rearranger"));
+
+    IndexWriterConfig createSegmentsConfig = new IndexWriterConfig(config.getAnalyzer());
+    IndexWriterConfig applyDeletesConfig = new IndexWriterConfig(config.getAnalyzer());
+
+    // Do not merge - each addIndexes call creates one segment
+    createSegmentsConfig.setMergePolicy(NoMergePolicy.INSTANCE);
+    applyDeletesConfig.setMergePolicy(NoMergePolicy.INSTANCE);
+
+    try (IndexWriter writer = new IndexWriter(output, createSegmentsConfig);
         IndexReader reader = DirectoryReader.open(input)) {
-      ExecutorService executor =
-          Executors.newFixedThreadPool(
-              Math.min(Runtime.getRuntime().availableProcessors(), documentSelectors.size()),
-              new NamedThreadFactory("rearranger"));
-      ArrayList<Future<Void>> futures = new ArrayList<>();
-      for (DocumentSelector record : documentSelectors) {
-        Callable<Void> addSegment =
-            () -> {
-              addOneSegment(writer, reader, record);
-              return null;
-            };
-        futures.add(executor.submit(addSegment));
-      }
-      for (Future<Void> future : futures) {
-        future.get();
-      }
-      executor.shutdown();
+      createRearrangedIndex(writer, reader, segmentSelectors, executor);
     }
+    finalizeRearrange(output, segmentSelectors);
+
+    try (IndexWriter writer = new IndexWriter(output, applyDeletesConfig);
+        IndexReader reader = DirectoryReader.open(writer)) {
+      applyDeletes(writer, reader, deletedDocsSelector, executor);
+    }
+    executor.shutdown();
+  }
+
+  /**
+   * Place segments in the order of their respective selectors and ensure the rearrange was
+   * performed correctly.
+   */
+  private static void finalizeRearrange(Directory output, List<DocumentSelector> segmentSelectors)
+      throws IOException {
     List<SegmentCommitInfo> ordered = new ArrayList<>();
     try (IndexReader reader = DirectoryReader.open(output)) {
-      for (DocumentSelector ds : documentSelectors) {
+      for (DocumentSelector ds : segmentSelectors) {
         int foundLeaf = -1;
         for (LeafReaderContext context : reader.leaves()) {
           SegmentReader sr = (SegmentReader) context.reader();
-          int docFound = ds.getFilteredLiveDocs(sr).nextSetBit(0);
+          int docFound = ds.getFilteredDocs(sr).nextSetBit(0);
           if (docFound != DocIdSetIterator.NO_MORE_DOCS) {
+            // Each document can be mapped to one segment at most
             if (foundLeaf != -1) {
               throw new IllegalStateException(
                   "Document selector "
@@ -129,6 +173,32 @@ public class IndexRearranger {
     sis.commit(output);
   }
 
+  /**
+   * Create the rearranged index as described by the segment selectors. Assume all documents in the
+   * original index are live.
+   */
+  private static void createRearrangedIndex(
+      IndexWriter writer,
+      IndexReader reader,
+      List<DocumentSelector> selectors,
+      ExecutorService executor)
+      throws ExecutionException, InterruptedException {
+
+    ArrayList<Future<Void>> futures = new ArrayList<>();
+    for (DocumentSelector selector : selectors) {
+      Callable<Void> addSegment =
+          () -> {
+            addOneSegment(writer, reader, selector);
+            return null;
+          };
+      futures.add(executor.submit(addSegment));
+    }
+
+    for (Future<Void> future : futures) {
+      future.get();
+    }
+  }
+
   private static void addOneSegment(
       IndexWriter writer, IndexReader reader, DocumentSelector selector) throws IOException {
     CodecReader[] readers = new CodecReader[reader.leaves().size()];
@@ -139,6 +209,42 @@ public class IndexRearranger {
     writer.addIndexes(readers);
   }
 
+  private static void applyDeletes(
+      IndexWriter writer, IndexReader reader, DocumentSelector selector, ExecutorService executor)
+      throws ExecutionException, InterruptedException {
+    if (selector == null) {
+      // There are no deletes to be applied
+      return;
+    }
+
+    ArrayList<Future<Void>> futures = new ArrayList<>();
+    for (LeafReaderContext context : reader.leaves()) {
+      Callable<Void> applyDeletesToSegment =
+          () -> {
+            applyDeletesToOneSegment(writer, (CodecReader) context.reader(), selector);
+            return null;
+          };
+      futures.add(executor.submit(applyDeletesToSegment));
+    }
+
+    for (Future<Void> future : futures) {
+      future.get();
+    }
+  }
+
+  private static void applyDeletesToOneSegment(
+      IndexWriter writer, CodecReader segmentReader, DocumentSelector selector) throws IOException {
+    Bits deletedDocs = selector.getFilteredDocs(segmentReader);
+    for (int docid = 0; docid < segmentReader.maxDoc(); ++docid) {
+      if (deletedDocs.get(docid)) {
+        if (writer.tryDeleteDocument(segmentReader, docid) == -1) {
+          throw new IllegalStateException(
+              "tryDeleteDocument has failed. This should never happen, since merging is disabled.");
+        }
+      }
+    }
+  }
+
   private static class DocSelectorFilteredCodecReader extends FilterCodecReader {
 
     BitSet filteredLiveDocs;
@@ -147,7 +253,7 @@ public class IndexRearranger {
     public DocSelectorFilteredCodecReader(CodecReader in, DocumentSelector selector)
         throws IOException {
       super(in);
-      filteredLiveDocs = selector.getFilteredLiveDocs(in);
+      filteredLiveDocs = selector.getFilteredDocs(in);
       numDocs = filteredLiveDocs.cardinality();
     }
 
@@ -174,6 +280,6 @@ public class IndexRearranger {
 
   /** Select document within a CodecReader */
   public interface DocumentSelector {
-    BitSet getFilteredLiveDocs(CodecReader reader) throws IOException;
+    BitSet getFilteredDocs(CodecReader reader) throws IOException;
   }
 }
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestBinaryDocValueSelector.java
similarity index 54%
copy from lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java
copy to lucene/misc/src/test/org/apache/lucene/misc/index/TestBinaryDocValueSelector.java
index afa75a3f330..4576c571ba3 100644
--- a/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestBinaryDocValueSelector.java
@@ -18,11 +18,11 @@
 package org.apache.lucene.misc.index;
 
 import java.io.IOException;
-import java.util.List;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.index.CodecReader;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
@@ -30,54 +30,21 @@ import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
-
-public class TestIndexRearranger extends LuceneTestCase {
-  public void testRearrange() throws Exception {
-    Directory inputDir = newDirectory();
-    createIndex(100, 10, inputDir);
-
-    Directory outputDir = newDirectory();
-    IndexRearranger rearranger =
-        new IndexRearranger(
-            inputDir,
-            outputDir,
-            getIndexWriterConfig(),
-            List.of(new OddDocSelector(), new EvenDocSelector()));
-    rearranger.execute();
-    IndexReader reader = DirectoryReader.open(outputDir);
-    assertEquals(2, reader.leaves().size());
-    LeafReader segment1 = reader.leaves().get(0).reader();
-    assertEquals(50, segment1.numDocs());
-    NumericDocValues numericDocValues = segment1.getNumericDocValues("ord");
-    assertTrue(numericDocValues.advanceExact(0));
-    boolean odd = numericDocValues.longValue() % 2 == 1;
-    for (int i = 1; i < 50; i++) {
-      assertTrue(numericDocValues.advanceExact(i));
-      assertEquals(odd, numericDocValues.longValue() % 2 == 1);
-    }
-    LeafReader segment2 = reader.leaves().get(1).reader();
-    assertEquals(50, segment2.numDocs());
-    numericDocValues = segment2.getNumericDocValues("ord");
-    assertTrue(numericDocValues.advanceExact(0));
-    boolean odd2 = numericDocValues.longValue() % 2 == 1;
-    assertTrue(odd != odd2);
-    for (int i = 1; i < 50; i++) {
-      assertTrue(numericDocValues.advanceExact(i));
-      assertEquals(odd2, numericDocValues.longValue() % 2 == 1);
-    }
-    reader.close();
-    inputDir.close();
-    outputDir.close();
-  }
 
+/**
+ * This test creates an index with 100 documents and 10 segments where all the documents have a
+ * binary doc value, "textOrd", which is filled with consecutive integers. The documents with even
+ * "textOrd" are marked for deletion. The index is then rearranged into 4 segments using {@link
+ * BinaryDocValueSelector}.
+ */
+public class TestBinaryDocValueSelector extends LuceneTestCase {
   public void testRearrangeUsingBinaryDocValueSelector() throws Exception {
     Directory srcDir = newDirectory();
     createIndex(100, 10, srcDir);
@@ -93,7 +60,8 @@ public class TestIndexRearranger extends LuceneTestCase {
             inputDir,
             outputDir,
             getIndexWriterConfig(),
-            BinaryDocValueSelector.createFromExistingIndex("textOrd", srcDir));
+            BinaryDocValueSelector.createLiveSelectorsFromIndex("textOrd", srcDir),
+            BinaryDocValueSelector.createDeleteSelectorFromIndex("textOrd", srcDir));
     rearranger.execute();
     assertSequentialIndex(outputDir, 100, 10);
 
@@ -109,11 +77,15 @@ public class TestIndexRearranger extends LuceneTestCase {
     for (int i = 0; i < segNum; i++) {
       LeafReader leafReader = reader.leaves().get(i).reader();
       NumericDocValues numericDocValues = leafReader.getNumericDocValues("ord");
+      Bits liveDocs = leafReader.getLiveDocs();
+      assertNotNull(liveDocs);
 
-      for (int doc = 0; doc < leafReader.numDocs(); doc++) {
+      for (int doc = 0; doc < leafReader.maxDoc(); doc++) {
         assertTrue(numericDocValues.advanceExact(doc));
         assertEquals(numericDocValues.longValue(), lastOrd + 1);
         lastOrd = numericDocValues.longValue();
+
+        assertEquals(liveDocs.get(doc), lastOrd % 2 != 0);
       }
     }
     assertEquals(docNum, lastOrd + 1);
@@ -134,8 +106,12 @@ public class TestIndexRearranger extends LuceneTestCase {
       Document doc = new Document();
       doc.add(new NumericDocValuesField("ord", i));
       doc.add(new BinaryDocValuesField("textOrd", new BytesRef(Integer.toString(i))));
+      if (i % 2 == 0) {
+        doc.add(new StringField("delete", "yes", Field.Store.YES));
+      }
       w.addDocument(doc);
       if (i % docPerSeg == docPerSeg - 1) {
+        w.deleteDocuments(new Term("delete", "yes"));
         w.commit();
       }
     }
@@ -144,42 +120,4 @@ public class TestIndexRearranger extends LuceneTestCase {
     reader.close();
     w.close();
   }
-
-  private static class OddDocSelector implements IndexRearranger.DocumentSelector {
-
-    @Override
-    public BitSet getFilteredLiveDocs(CodecReader reader) throws IOException {
-      FixedBitSet filteredSet = new FixedBitSet(reader.maxDoc());
-      Bits liveDocs = reader.getLiveDocs();
-      NumericDocValues numericDocValues = reader.getNumericDocValues("ord");
-      for (int i = 0; i < reader.maxDoc(); i++) {
-        if (liveDocs != null && liveDocs.get(i) == false) {
-          continue;
-        }
-        if (numericDocValues.advanceExact(i) && numericDocValues.longValue() % 2 == 1) {
-          filteredSet.set(i);
-        }
-      }
-      return filteredSet;
-    }
-  }
-
-  private static class EvenDocSelector implements IndexRearranger.DocumentSelector {
-
-    @Override
-    public BitSet getFilteredLiveDocs(CodecReader reader) throws IOException {
-      FixedBitSet filteredSet = new FixedBitSet(reader.maxDoc());
-      Bits liveDocs = reader.getLiveDocs();
-      NumericDocValues numericDocValues = reader.getNumericDocValues("ord");
-      for (int i = 0; i < reader.maxDoc(); i++) {
-        if (liveDocs != null && liveDocs.get(i) == false) {
-          continue;
-        }
-        if (numericDocValues.advanceExact(i) && numericDocValues.longValue() % 2 == 0) {
-          filteredSet.set(i);
-        }
-      }
-      return filteredSet;
-    }
-  }
 }
diff --git a/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java b/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java
index afa75a3f330..3907b19f13c 100644
--- a/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java
+++ b/lucene/misc/src/test/org/apache/lucene/misc/index/TestIndexRearranger.java
@@ -18,10 +18,12 @@
 package org.apache.lucene.misc.index;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
-import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -30,6 +32,7 @@ import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.store.Directory;
@@ -39,10 +42,47 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 
+/**
+ * This test creates the following index:
+ *
+ * <p>segment 0: (ord = 0, live = true), (ord = 1, live = false)
+ *
+ * <p>segment 1: (ord = 2, live = true), (ord = 3, live = false)
+ *
+ * <p>segment 4: (ord = 4, live = true), (ord = 5, live = false)
+ *
+ * <p>It also creates 3 selectors:
+ *
+ * <p>{@link LiveToLiveSelector} selects for ord in {0, 2}
+ *
+ * <p>{@link DeleteToLiveSelector} selects for ord in {3, 5}
+ *
+ * <p>{@link DeleteSelector} selects for ord in {1, 2, 4, 5}
+ */
 public class TestIndexRearranger extends LuceneTestCase {
-  public void testRearrange() throws Exception {
+  /**
+   * Use {@link LiveToLiveSelector} and {@link DeleteToLiveSelector} to rearrange the index into 2
+   * segments:
+   *
+   * <p>segment 0: (ord = 0, live = true), (ord = 2, live = true)
+   *
+   * <p>segment 1: (ord = 3, live = true), (ord = 5, live = true)
+   *
+   * <p>The documents with ord 1 and 4 have now been lost, since they were not selected to be in the
+   * rearranged index. All documents that were selected are now considered live.
+   *
+   * <p>Next, we apply the deletions specified by {@link DeleteSelector}:
+   *
+   * <p>segment 0: (ord = 0, live = true), (ord = 2, live = false)
+   *
+   * <p>segment 1: (ord = 3, live = true), (ord = 5, live = false)
+   *
+   * <p>The documents with ord 2 and 5 have been marked for deletion. Documents 1 and 4 would have
+   * also been marked if they were present.
+   */
+  public void testLiveDeleteCombinations() throws Exception {
     Directory inputDir = newDirectory();
-    createIndex(100, 10, inputDir);
+    createIndex(inputDir);
 
     Directory outputDir = newDirectory();
     IndexRearranger rearranger =
@@ -50,42 +90,55 @@ public class TestIndexRearranger extends LuceneTestCase {
             inputDir,
             outputDir,
             getIndexWriterConfig(),
-            List.of(new OddDocSelector(), new EvenDocSelector()));
+            List.of(
+                new TestIndexRearranger.LiveToLiveSelector(),
+                new TestIndexRearranger.DeleteToLiveSelector()),
+            new TestIndexRearranger.DeleteSelector());
     rearranger.execute();
+
     IndexReader reader = DirectoryReader.open(outputDir);
     assertEquals(2, reader.leaves().size());
+
     LeafReader segment1 = reader.leaves().get(0).reader();
-    assertEquals(50, segment1.numDocs());
+    assertEquals(1, segment1.numDocs());
+    assertEquals(2, segment1.maxDoc());
+    Bits liveDocs = segment1.getLiveDocs();
+    assertNotNull(liveDocs);
     NumericDocValues numericDocValues = segment1.getNumericDocValues("ord");
+    assertNotNull(numericDocValues);
     assertTrue(numericDocValues.advanceExact(0));
-    boolean odd = numericDocValues.longValue() % 2 == 1;
-    for (int i = 1; i < 50; i++) {
-      assertTrue(numericDocValues.advanceExact(i));
-      assertEquals(odd, numericDocValues.longValue() % 2 == 1);
-    }
+    assertTrue(liveDocs.get(0));
+    assertEquals(0, numericDocValues.longValue());
+    assertTrue(numericDocValues.advanceExact(1));
+    assertFalse(liveDocs.get(1));
+    assertEquals(2, numericDocValues.longValue());
+
     LeafReader segment2 = reader.leaves().get(1).reader();
-    assertEquals(50, segment2.numDocs());
+    assertEquals(1, segment2.numDocs());
+    assertEquals(2, segment2.maxDoc());
+    liveDocs = segment1.getLiveDocs();
+    assertNotNull(liveDocs);
     numericDocValues = segment2.getNumericDocValues("ord");
+    assertNotNull(numericDocValues);
+    assertTrue(liveDocs.get(0));
     assertTrue(numericDocValues.advanceExact(0));
-    boolean odd2 = numericDocValues.longValue() % 2 == 1;
-    assertTrue(odd != odd2);
-    for (int i = 1; i < 50; i++) {
-      assertTrue(numericDocValues.advanceExact(i));
-      assertEquals(odd2, numericDocValues.longValue() % 2 == 1);
-    }
+    assertEquals(3, numericDocValues.longValue());
+    assertFalse(liveDocs.get(1));
+    assertTrue(numericDocValues.advanceExact(1));
+    assertEquals(5, numericDocValues.longValue());
+
     reader.close();
     inputDir.close();
     outputDir.close();
   }
 
-  public void testRearrangeUsingBinaryDocValueSelector() throws Exception {
-    Directory srcDir = newDirectory();
-    createIndex(100, 10, srcDir);
-    assertSequentialIndex(srcDir, 100, 10);
-
+  /**
+   * This test arrives at an empty rearranged index by using the same selector for creating segments
+   * and applying deletes.
+   */
+  public void testDeleteEverything() throws Exception {
     Directory inputDir = newDirectory();
-    createIndex(100, 4, inputDir);
-    assertSequentialIndex(inputDir, 100, 4);
+    createIndex(inputDir);
 
     Directory outputDir = newDirectory();
     IndexRearranger rearranger =
@@ -93,31 +146,64 @@ public class TestIndexRearranger extends LuceneTestCase {
             inputDir,
             outputDir,
             getIndexWriterConfig(),
-            BinaryDocValueSelector.createFromExistingIndex("textOrd", srcDir));
+            List.of(new TestIndexRearranger.LiveToLiveSelector()),
+            new TestIndexRearranger.LiveToLiveSelector());
     rearranger.execute();
-    assertSequentialIndex(outputDir, 100, 10);
 
-    outputDir.close();
+    IndexReader reader = DirectoryReader.open(outputDir);
+    assertEquals(0, reader.leaves().size());
+
+    reader.close();
     inputDir.close();
-    srcDir.close();
+    outputDir.close();
   }
 
-  private static void assertSequentialIndex(Directory dir, int docNum, int segNum)
-      throws IOException {
-    IndexReader reader = DirectoryReader.open(dir);
-    long lastOrd = -1;
-    for (int i = 0; i < segNum; i++) {
-      LeafReader leafReader = reader.leaves().get(i).reader();
-      NumericDocValues numericDocValues = leafReader.getNumericDocValues("ord");
-
-      for (int doc = 0; doc < leafReader.numDocs(); doc++) {
-        assertTrue(numericDocValues.advanceExact(doc));
-        assertEquals(numericDocValues.longValue(), lastOrd + 1);
-        lastOrd = numericDocValues.longValue();
-      }
-    }
-    assertEquals(docNum, lastOrd + 1);
+  /** Don't pass a deletes selector, all selected docs will be live. */
+  public void testDeleteNothing() throws Exception {
+    Directory inputDir = newDirectory();
+    createIndex(inputDir);
+
+    Directory outputDir = newDirectory();
+    IndexRearranger rearranger =
+        new IndexRearranger(
+            inputDir,
+            outputDir,
+            getIndexWriterConfig(),
+            List.of(
+                new TestIndexRearranger.LiveToLiveSelector(),
+                new TestIndexRearranger.DeleteToLiveSelector()));
+    rearranger.execute();
+
+    IndexReader reader = DirectoryReader.open(outputDir);
+    assertEquals(2, reader.leaves().size());
+
+    LeafReader segment1 = reader.leaves().get(0).reader();
+    assertEquals(2, segment1.numDocs());
+    assertEquals(2, segment1.maxDoc());
+    Bits liveDocs = segment1.getLiveDocs();
+    assertNull(liveDocs);
+    NumericDocValues numericDocValues = segment1.getNumericDocValues("ord");
+    assertNotNull(numericDocValues);
+    assertTrue(numericDocValues.advanceExact(0));
+    assertEquals(0, numericDocValues.longValue());
+    assertTrue(numericDocValues.advanceExact(1));
+    assertEquals(2, numericDocValues.longValue());
+
+    LeafReader segment2 = reader.leaves().get(1).reader();
+    assertEquals(2, segment2.numDocs());
+    assertEquals(2, segment2.maxDoc());
+    liveDocs = segment1.getLiveDocs();
+    assertNull(liveDocs);
+    numericDocValues = segment2.getNumericDocValues("ord");
+    assertNotNull(numericDocValues);
+    assertTrue(numericDocValues.advanceExact(0));
+    assertEquals(3, numericDocValues.longValue());
+    assertTrue(numericDocValues.advanceExact(1));
+    assertEquals(5, numericDocValues.longValue());
+
     reader.close();
+    inputDir.close();
+    outputDir.close();
   }
 
   private static IndexWriterConfig getIndexWriterConfig() {
@@ -127,56 +213,92 @@ public class TestIndexRearranger extends LuceneTestCase {
         .setIndexSort(new Sort(new SortField("ord", SortField.Type.INT)));
   }
 
-  private static void createIndex(int docNum, int segNum, Directory dir) throws IOException {
+  private static void createIndex(Directory dir) throws IOException {
     IndexWriter w = new IndexWriter(dir, getIndexWriterConfig());
-    int docPerSeg = (int) Math.ceil((double) docNum / segNum);
-    for (int i = 0; i < docNum; i++) {
+    for (int i = 0; i < 6; i++) {
       Document doc = new Document();
       doc.add(new NumericDocValuesField("ord", i));
-      doc.add(new BinaryDocValuesField("textOrd", new BytesRef(Integer.toString(i))));
+      if (i % 2 == 1) {
+        doc.add(new StringField("delete", new BytesRef("yes"), Field.Store.YES));
+      }
       w.addDocument(doc);
-      if (i % docPerSeg == docPerSeg - 1) {
+      if (i % 2 == 1) {
+        w.deleteDocuments(new Term("delete", "yes"));
         w.commit();
       }
     }
-    IndexReader reader = DirectoryReader.open(w);
-    assertEquals(segNum, reader.leaves().size());
-    reader.close();
+
+    assertCreatedIndex(dir);
     w.close();
   }
 
-  private static class OddDocSelector implements IndexRearranger.DocumentSelector {
+  private static void assertCreatedIndex(Directory dir) throws IOException {
+    IndexReader reader = DirectoryReader.open(dir);
+    assertEquals(3, reader.leaves().size());
+
+    for (int i = 0; i < 3; i++) {
+      LeafReader segmentReader = reader.leaves().get(i).reader();
+      assertEquals(1, segmentReader.numDocs());
+      assertEquals(2, segmentReader.maxDoc());
 
+      Bits liveDocs = segmentReader.getLiveDocs();
+      assertNotNull(liveDocs);
+      assertTrue(liveDocs.get(0));
+      assertFalse(liveDocs.get(1));
+
+      NumericDocValues ord = segmentReader.getNumericDocValues("ord");
+      assertNotNull(ord);
+      assertTrue(ord.advanceExact(0));
+      assertEquals(2 * i, ord.longValue());
+      assertTrue(ord.advanceExact(1));
+      assertEquals(2 * i + 1, ord.longValue());
+    }
+
+    reader.close();
+  }
+
+  private static class LiveToLiveSelector implements IndexRearranger.DocumentSelector {
     @Override
-    public BitSet getFilteredLiveDocs(CodecReader reader) throws IOException {
+    public BitSet getFilteredDocs(CodecReader reader) throws IOException {
       FixedBitSet filteredSet = new FixedBitSet(reader.maxDoc());
-      Bits liveDocs = reader.getLiveDocs();
       NumericDocValues numericDocValues = reader.getNumericDocValues("ord");
-      for (int i = 0; i < reader.maxDoc(); i++) {
-        if (liveDocs != null && liveDocs.get(i) == false) {
-          continue;
-        }
-        if (numericDocValues.advanceExact(i) && numericDocValues.longValue() % 2 == 1) {
-          filteredSet.set(i);
+      assert numericDocValues != null;
+      for (int docid = 0; docid < reader.maxDoc(); docid++) {
+        if (numericDocValues.advanceExact(docid)
+            && Arrays.asList(0, 2).contains((int) numericDocValues.longValue())) {
+          filteredSet.set(docid);
         }
       }
       return filteredSet;
     }
   }
 
-  private static class EvenDocSelector implements IndexRearranger.DocumentSelector {
-
+  private static class DeleteToLiveSelector implements IndexRearranger.DocumentSelector {
     @Override
-    public BitSet getFilteredLiveDocs(CodecReader reader) throws IOException {
+    public BitSet getFilteredDocs(CodecReader reader) throws IOException {
       FixedBitSet filteredSet = new FixedBitSet(reader.maxDoc());
-      Bits liveDocs = reader.getLiveDocs();
       NumericDocValues numericDocValues = reader.getNumericDocValues("ord");
-      for (int i = 0; i < reader.maxDoc(); i++) {
-        if (liveDocs != null && liveDocs.get(i) == false) {
-          continue;
+      assert numericDocValues != null;
+      for (int docid = 0; docid < reader.maxDoc(); docid++) {
+        if (numericDocValues.advanceExact(docid)
+            && Arrays.asList(3, 5).contains((int) numericDocValues.longValue())) {
+          filteredSet.set(docid);
         }
-        if (numericDocValues.advanceExact(i) && numericDocValues.longValue() % 2 == 0) {
-          filteredSet.set(i);
+      }
+      return filteredSet;
+    }
+  }
+
+  private static class DeleteSelector implements IndexRearranger.DocumentSelector {
+    @Override
+    public BitSet getFilteredDocs(CodecReader reader) throws IOException {
+      FixedBitSet filteredSet = new FixedBitSet(reader.maxDoc());
+      NumericDocValues numericDocValues = reader.getNumericDocValues("ord");
+      assert numericDocValues != null;
+      for (int docid = 0; docid < reader.maxDoc(); docid++) {
+        if (numericDocValues.advanceExact(docid)
+            && Arrays.asList(1, 2, 4, 5).contains((int) numericDocValues.longValue())) {
+          filteredSet.set(docid);
         }
       }
       return filteredSet;