You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2021/05/16 20:47:42 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9935: Enable bulk merge for stored fields with index sort

This is an automated email from the ASF dual-hosted git repository.

dnhatn pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new efdc43f  LUCENE-9935: Enable bulk merge for stored fields with index sort
efdc43f is described below

commit efdc43fee18e33c664a22df97ef6cd40912af20d
Author: Nhat Nguyen <nh...@elastic.co>
AuthorDate: Sun May 16 16:47:21 2021 -0400

    LUCENE-9935: Enable bulk merge for stored fields with index sort
    
    This commit enables bulk-merges (i.e., raw chunk copying) for stored
    fields when index sort is enabled.
---
 lucene/CHANGES.txt                                 |   1 +
 .../compressing/CompressingStoredFieldsReader.java |  18 +-
 .../compressing/CompressingStoredFieldsWriter.java | 354 +++++++++++----------
 .../index/BaseStoredFieldsFormatTestCase.java      | 129 ++++++++
 4 files changed, 332 insertions(+), 170 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 35a794b..d85f088 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -54,6 +54,7 @@ Improvements
 * LUCENE-9877: Reduce index size by increasing allowable exceptions in PForUtil from
   3 to 7. (Greg Miller)
 
+* LUCENE-9935: Enable bulk merge for stored fields with index sort. (Robert Muir, Adrien Grand, Nhat Nguyen)
 
 Optimizations
 ---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
index 03aaa39..e39d0b6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
@@ -37,7 +37,7 @@ import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter
 import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_BITS;
 import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.TYPE_MASK;
 import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_CURRENT;
-import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_NUMCHUNKS;
+import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_NUM_CHUNKS;
 import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_META;
 import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_OFFHEAP_INDEX;
 import static org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.VERSION_START;
@@ -189,7 +189,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
       this.maxPointer = maxPointer;
       this.indexReader = indexReader;
 
-      if (version >= VERSION_NUMCHUNKS) {
+      if (version >= VERSION_NUM_CHUNKS) {
         numChunks = metaIn.readVLong();
         numDirtyChunks = metaIn.readVLong();
         numDirtyDocs = metaIn.readVLong();
@@ -480,7 +480,7 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
     private void doReset(int docID) throws IOException {
       docBase = fieldsStream.readVInt();
       final int token = fieldsStream.readVInt();
-      chunkDocs = token >>> 1;
+      chunkDocs = version >= VERSION_NUM_CHUNKS ? token >>> 2 : token >>> 1;
       if (contains(docID) == false
           || docBase + chunkDocs > numDocs) {
         throw new CorruptIndexException("Corrupted: docID=" + docID
@@ -659,6 +659,18 @@ public final class CompressingStoredFieldsReader extends StoredFieldsReader {
     return state.document(docID);
   }
 
+  /** Checks if a given docID was loaded in the current block state. */
+  boolean isLoaded(int docID) {
+    if (merging == false) {
+      throw new IllegalStateException("isLoaded should only ever get called on a merge instance");
+    }
+    if (version != VERSION_CURRENT) {
+      throw new IllegalStateException(
+          "isLoaded should only ever get called when the reader is on the current version");
+    }
+    return state.contains(docID);
+  }
+
   @Override
   public void visitDocument(int docID, StoredFieldVisitor visitor)
       throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
index fbed04d..7520b92 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
@@ -41,7 +41,6 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitUtil;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.PackedInts;
@@ -77,9 +76,9 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
   static final int VERSION_OFFHEAP_INDEX = 2;
   /** Version where all metadata were moved to the meta file. */
   static final int VERSION_META = 3;
-  /** Version where numChunks is explicitly recorded in meta file */
-  static final int VERSION_NUMCHUNKS = 4;
-  static final int VERSION_CURRENT = VERSION_NUMCHUNKS;
+  /** Version where numChunks is explicitly recorded in meta file and a dirty chunk bit is recorded in each chunk */
+  static final int VERSION_NUM_CHUNKS = 4;
+  static final int VERSION_CURRENT = VERSION_NUM_CHUNKS;
   static final int META_VERSION_START = 0;
 
   private final String segment;
@@ -170,7 +169,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     endOffsets[numBufferedDocs] = Math.toIntExact(bufferedDocs.size());
     ++numBufferedDocs;
     if (triggerFlush()) {
-      flush();
+      flush(false);
     }
   }
 
@@ -205,12 +204,14 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     }
   }
 
-  private void writeHeader(int docBase, int numBufferedDocs, int[] numStoredFields, int[] lengths, boolean sliced) throws IOException {
+  private void writeHeader(int docBase, int numBufferedDocs, int[] numStoredFields,
+                           int[] lengths, boolean sliced, boolean dirtyChunk) throws IOException {
     final int slicedBit = sliced ? 1 : 0;
-    
+    final int dirtyBit = dirtyChunk ? 2 : 0;
+
     // save docBase and numBufferedDocs
     fieldsStream.writeVInt(docBase);
-    fieldsStream.writeVInt((numBufferedDocs) << 1 | slicedBit);
+    fieldsStream.writeVInt((numBufferedDocs << 2) | dirtyBit | slicedBit);
 
     // save numStoredFields
     saveInts(numStoredFields, numBufferedDocs, fieldsStream);
@@ -224,8 +225,13 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
         numBufferedDocs >= maxDocsPerChunk;
   }
 
-  private void flush() throws IOException {
+  private void flush(boolean force) throws IOException {
+    assert triggerFlush() != force;
     numChunks++;
+    if (force) {
+      numDirtyChunks++; // incomplete: we had to force this flush
+      numDirtyDocs += numBufferedDocs;
+    }
     indexWriter.writeIndex(numBufferedDocs, fieldsStream.getFilePointer());
 
     // transform end offsets into lengths
@@ -235,7 +241,8 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
       assert lengths[i] >= 0;
     }
     final boolean sliced = bufferedDocs.size() >= 2 * chunkSize;
-    writeHeader(docBase, numBufferedDocs, numStoredFields, lengths, sliced);
+    final boolean dirtyChunk = force;
+    writeHeader(docBase, numBufferedDocs, numStoredFields, lengths, sliced, dirtyChunk);
 
     // compress stored fields to fieldsStream
     //
@@ -472,9 +479,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
   @Override
   public void finish(FieldInfos fis, int numDocs) throws IOException {
     if (numBufferedDocs > 0) {
-      numDirtyChunks++; // incomplete: we had to force this flush
-      numDirtyDocs += numBufferedDocs;
-      flush();
+      flush(true);
     } else {
       assert bufferedDocs.size() == 0;
     }
@@ -503,164 +508,143 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     BULK_MERGE_ENABLED = v;
   }
 
-  @Override
-  public int merge(MergeState mergeState) throws IOException {
-    int docCount = 0;
-    int numReaders = mergeState.maxDocs.length;
-    
-    MatchingReaders matching = new MatchingReaders(mergeState);
-    if (mergeState.needsIndexSort) {
-      /**
-       * If all readers are compressed and they have the same fieldinfos then we can merge the serialized document
-       * directly.
-       */
-      List<CompressingStoredFieldsMergeSub> subs = new ArrayList<>();
-      for(int i=0;i<mergeState.storedFieldsReaders.length;i++) {
-        if (matching.matchingReaders[i] &&
-            mergeState.storedFieldsReaders[i] instanceof CompressingStoredFieldsReader) {
-          CompressingStoredFieldsReader storedFieldsReader = (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[i];
-          storedFieldsReader.checkIntegrity();
-          subs.add(new CompressingStoredFieldsMergeSub(storedFieldsReader, mergeState.docMaps[i], mergeState.maxDocs[i]));
-        } else {
-          return super.merge(mergeState);
-        }
-      }
+  private void copyOneDoc(CompressingStoredFieldsReader reader, int docID)
+      throws IOException {
+    assert reader.getVersion() == VERSION_CURRENT;
+    SerializedDocument doc = reader.document(docID);
+    startDocument();
+    bufferedDocs.copyBytes(doc.in, doc.length);
+    numStoredFieldsInDoc = doc.numStoredFields;
+    finishDocument();
+  }
 
-      final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger =
-          DocIDMerger.of(subs, true);
-      while (true) {
-        CompressingStoredFieldsMergeSub sub = docIDMerger.next();
-        if (sub == null) {
-          break;
-        }
-        assert sub.mappedDocID == docCount;
-        SerializedDocument doc = sub.reader.document(sub.docID);
-        startDocument();
-        bufferedDocs.copyBytes(doc.in, doc.length);
-        numStoredFieldsInDoc = doc.numStoredFields;
-        finishDocument();
-        ++docCount;
-      }
-      finish(mergeState.mergeFieldInfos, docCount);
-      return docCount;
+  private void copyChunks(
+      final MergeState mergeState,
+      final CompressingStoredFieldsMergeSub sub,
+      final int fromDocID,
+      final int toDocID)
+      throws IOException {
+    final CompressingStoredFieldsReader reader =
+        (CompressingStoredFieldsReader) mergeState.storedFieldsReaders[sub.readerIndex];
+    assert reader.getVersion() == VERSION_CURRENT;
+    assert reader.getChunkSize() == chunkSize;
+    assert reader.getCompressionMode() == compressionMode;
+    assert !tooDirty(reader);
+    assert mergeState.liveDocs[sub.readerIndex] == null;
+
+    int docID = fromDocID;
+    final FieldsIndex index = reader.getIndexReader();
+
+    // copy docs that belong to the previous chunk
+    while (docID < toDocID && reader.isLoaded(docID)) {
+      copyOneDoc(reader, docID++);
     }
-    
-    for (int readerIndex=0;readerIndex<numReaders;readerIndex++) {
-      MergeVisitor visitor = new MergeVisitor(mergeState, readerIndex);
-      CompressingStoredFieldsReader matchingFieldsReader = null;
-      if (matching.matchingReaders[readerIndex]) {
-        final StoredFieldsReader fieldsReader = mergeState.storedFieldsReaders[readerIndex];
-        // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
-        if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
-          matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
-        }
+    if (docID >= toDocID) {
+      return;
+    }
+    // copy chunks
+    long fromPointer = index.getStartPointer(docID);
+    final long toPointer =
+        toDocID == sub.maxDoc ? reader.getMaxPointer() : index.getStartPointer(toDocID);
+    if (fromPointer < toPointer) {
+      if (numBufferedDocs > 0) {
+        flush(true);
       }
-
-      final int maxDoc = mergeState.maxDocs[readerIndex];
-      final Bits liveDocs = mergeState.liveDocs[readerIndex];
-
-      // if its some other format, or an older version of this format, or safety switch:
-      if (matchingFieldsReader == null || matchingFieldsReader.getVersion() != VERSION_CURRENT || BULK_MERGE_ENABLED == false) {
-        // naive merge...
-        StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[readerIndex];
-        if (storedFieldsReader != null) {
-          storedFieldsReader.checkIntegrity();
+      final IndexInput rawDocs = reader.getFieldsStream();
+      rawDocs.seek(fromPointer);
+      do {
+        final int base = rawDocs.readVInt();
+        final int code = rawDocs.readVInt();
+        final int bufferedDocs = code >>> 2;
+        if (base != docID) {
+          throw new CorruptIndexException(
+              "invalid state: base=" + base + ", docID=" + docID, rawDocs);
         }
-        for (int docID = 0; docID < maxDoc; docID++) {
-          if (liveDocs != null && liveDocs.get(docID) == false) {
-            continue;
-          }
-          startDocument();
-          storedFieldsReader.visitDocument(docID, visitor);
-          finishDocument();
-          ++docCount;
+        // write a new index entry and new header for this chunk.
+        indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
+        fieldsStream.writeVInt(docBase); // rebase
+        fieldsStream.writeVInt(code);
+        docID += bufferedDocs;
+        docBase += bufferedDocs;
+        if (docID > toDocID) {
+          throw new CorruptIndexException(
+              "invalid state: base=" + base + ", count=" + bufferedDocs + ", toDocID=" + toDocID,
+              rawDocs);
         }
-      } else if (matchingFieldsReader.getCompressionMode() == compressionMode && 
-                 matchingFieldsReader.getChunkSize() == chunkSize && 
-                 matchingFieldsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT &&
-                 liveDocs == null &&
-                 !tooDirty(matchingFieldsReader)) { 
-        // optimized merge, raw byte copy
-        // its not worth fine-graining this if there are deletions.
-        
-        // if the format is older, its always handled by the naive merge case above
-        assert matchingFieldsReader.getVersion() == VERSION_CURRENT;        
-        matchingFieldsReader.checkIntegrity();
-        
-        // flush any pending chunks
-        if (numBufferedDocs > 0) {
-          numDirtyChunks++; // incomplete: we had to force this flush
-          numDirtyDocs += numBufferedDocs;
-          flush();
+        // copy bytes until the next chunk boundary (or end of chunk data).
+        // using the stored fields index for this isn't the most efficient, but fast enough
+        // and is a source of redundancy for detecting bad things.
+        final long endChunkPointer;
+        if (docID == sub.maxDoc) {
+          endChunkPointer = reader.getMaxPointer();
+        } else {
+          endChunkPointer = index.getStartPointer(docID);
         }
-        
-        // iterate over each chunk. we use the stored fields index to find chunk boundaries,
-        // read the docstart + doccount from the chunk header (we write a new header, since doc numbers will change),
-        // and just copy the bytes directly.
-        IndexInput rawDocs = matchingFieldsReader.getFieldsStream();
-        FieldsIndex index = matchingFieldsReader.getIndexReader();
-        rawDocs.seek(index.getStartPointer(0));
-        int docID = 0;
-        while (docID < maxDoc) {
-          // read header
-          int base = rawDocs.readVInt();
-          if (base != docID) {
-            throw new CorruptIndexException("invalid state: base=" + base + ", docID=" + docID, rawDocs);
-          }
-          int code = rawDocs.readVInt();
-          
-          // write a new index entry and new header for this chunk.
-          int bufferedDocs = code >>> 1;
-          indexWriter.writeIndex(bufferedDocs, fieldsStream.getFilePointer());
-          fieldsStream.writeVInt(docBase); // rebase
-          fieldsStream.writeVInt(code);
-          docID += bufferedDocs;
-          docBase += bufferedDocs;
-          docCount += bufferedDocs;
-          
-          if (docID > maxDoc) {
-            throw new CorruptIndexException("invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc, rawDocs);
-          }
-          
-          // copy bytes until the next chunk boundary (or end of chunk data).
-          // using the stored fields index for this isn't the most efficient, but fast enough
-          // and is a source of redundancy for detecting bad things.
-          final long end;
-          if (docID == maxDoc) {
-            end = matchingFieldsReader.getMaxPointer();
-          } else {
-            end = index.getStartPointer(docID);
-          }
-          fieldsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
+        fieldsStream.copyBytes(rawDocs, endChunkPointer - rawDocs.getFilePointer());
+        ++numChunks;
+        final boolean dirtyChunk = (code & 2) != 0;
+        if (dirtyChunk) {
+          assert bufferedDocs < maxDocsPerChunk;
+          ++numDirtyChunks;
+          numDirtyDocs += bufferedDocs;
         }
-               
-        if (rawDocs.getFilePointer() != matchingFieldsReader.getMaxPointer()) {
-          throw new CorruptIndexException("invalid state: pos=" + rawDocs.getFilePointer() + ", max=" + matchingFieldsReader.getMaxPointer(), rawDocs);
+        fromPointer = endChunkPointer;
+      } while (fromPointer < toPointer);
+    }
+
+    // copy leftover docs that don't form a complete chunk
+    assert reader.isLoaded(docID) == false;
+    while (docID < toDocID) {
+      copyOneDoc(reader, docID++);
+    }
+  }
+
+  @Override
+  public int merge(MergeState mergeState) throws IOException {
+    final MatchingReaders matchingReaders = new MatchingReaders(mergeState);
+    final MergeVisitor[] visitors = new MergeVisitor[mergeState.storedFieldsReaders.length];
+    final List<CompressingStoredFieldsMergeSub> subs =
+        new ArrayList<>(mergeState.storedFieldsReaders.length);
+    for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
+      final StoredFieldsReader reader = mergeState.storedFieldsReaders[i];
+      reader.checkIntegrity();
+      MergeStrategy mergeStrategy = getMergeStrategy(mergeState, matchingReaders, i);
+      if (mergeStrategy == MergeStrategy.VISITOR) {
+        visitors[i] = new MergeVisitor(mergeState, i);
+      }
+      subs.add(new CompressingStoredFieldsMergeSub(mergeState, mergeStrategy, i));
+    }
+    int docCount = 0;
+    final DocIDMerger<CompressingStoredFieldsMergeSub> docIDMerger =
+        DocIDMerger.of(subs, mergeState.needsIndexSort);
+    CompressingStoredFieldsMergeSub sub = docIDMerger.next();
+    while (sub != null) {
+      assert sub.mappedDocID == docCount : sub.mappedDocID + " != " + docCount;
+      final StoredFieldsReader reader = mergeState.storedFieldsReaders[sub.readerIndex];
+      if (sub.mergeStrategy == MergeStrategy.BULK) {
+        final int fromDocID = sub.docID;
+        int toDocID = fromDocID;
+        final CompressingStoredFieldsMergeSub current = sub;
+        while ((sub = docIDMerger.next()) == current) {
+          ++toDocID;
+          assert sub.docID == toDocID;
         }
-        
-        // since we bulk merged all chunks, we inherit any dirty ones from this segment.
-        numChunks += matchingFieldsReader.getNumChunks();
-        numDirtyChunks += matchingFieldsReader.getNumDirtyChunks();
-        numDirtyDocs += matchingFieldsReader.getNumDirtyDocs();
+        ++toDocID; // exclusive bound
+        copyChunks(mergeState, current, fromDocID, toDocID);
+        docCount += (toDocID - fromDocID);
+      } else if (sub.mergeStrategy == MergeStrategy.DOC) {
+        copyOneDoc((CompressingStoredFieldsReader) reader, sub.docID);
+        ++docCount;
+        sub = docIDMerger.next();
+      } else if (sub.mergeStrategy == MergeStrategy.VISITOR) {
+        assert visitors[sub.readerIndex] != null;
+        startDocument();
+        reader.visitDocument(sub.docID, visitors[sub.readerIndex]);
+        finishDocument();
+        ++docCount;
+        sub = docIDMerger.next();
       } else {
-        // optimized merge, we copy serialized (but decompressed) bytes directly
-        // even on simple docs (1 stored field), it seems to help by about 20%
-        
-        // if the format is older, its always handled by the naive merge case above
-        assert matchingFieldsReader.getVersion() == VERSION_CURRENT;
-        matchingFieldsReader.checkIntegrity();
-
-        for (int docID = 0; docID < maxDoc; docID++) {
-          if (liveDocs != null && liveDocs.get(docID) == false) {
-            continue;
-          }
-          SerializedDocument doc = matchingFieldsReader.document(docID);
-          startDocument();
-          bufferedDocs.copyBytes(doc.in, doc.length);
-          numStoredFieldsInDoc = doc.numStoredFields;
-          finishDocument();
-          ++docCount;
-        }
+        throw new AssertionError("Unknown merge strategy [" + sub.mergeStrategy + "]");
       }
     }
     finish(mergeState.mergeFieldInfos, docCount);
@@ -681,15 +665,51 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
         && candidate.getNumDirtyChunks() * 100 > candidate.getNumChunks();
   }
 
+  private enum MergeStrategy {
+    /** Copy chunk by chunk in a compressed format */
+    BULK,
+
+    /** Copy document by document in a decompressed format */
+    DOC,
+
+    /** Copy field by field of decompressed documents */
+    VISITOR
+  }
+
+  private MergeStrategy getMergeStrategy(
+      MergeState mergeState, MatchingReaders matchingReaders, int readerIndex) {
+    final StoredFieldsReader candidate = mergeState.storedFieldsReaders[readerIndex];
+    if (matchingReaders.matchingReaders[readerIndex] == false
+        || candidate instanceof CompressingStoredFieldsReader == false
+        || ((CompressingStoredFieldsReader) candidate).getVersion() != VERSION_CURRENT) {
+      return MergeStrategy.VISITOR;
+    }
+    CompressingStoredFieldsReader reader = (CompressingStoredFieldsReader) candidate;
+    if (BULK_MERGE_ENABLED
+        && reader.getCompressionMode() == compressionMode
+        && reader.getChunkSize() == chunkSize
+        && reader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT
+        // its not worth fine-graining this if there are deletions.
+        && mergeState.liveDocs[readerIndex] == null
+        && !tooDirty(reader)) {
+      return MergeStrategy.BULK;
+    } else {
+      return MergeStrategy.DOC;
+    }
+  }
+
   private static class CompressingStoredFieldsMergeSub extends DocIDMerger.Sub {
-    private final CompressingStoredFieldsReader reader;
+    private final int readerIndex;
     private final int maxDoc;
+    private final MergeStrategy mergeStrategy;
     int docID = -1;
 
-    CompressingStoredFieldsMergeSub(CompressingStoredFieldsReader reader, MergeState.DocMap docMap, int maxDoc) {
-      super(docMap);
-      this.maxDoc = maxDoc;
-      this.reader = reader;
+    CompressingStoredFieldsMergeSub(
+        MergeState mergeState, MergeStrategy mergeStrategy, int readerIndex) {
+      super(mergeState.docMaps[readerIndex]);
+      this.readerIndex = readerIndex;
+      this.mergeStrategy = mergeStrategy;
+      this.maxDoc = mergeState.maxDocs[readerIndex];
     }
 
     @Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
index 0c975cf..4713612 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseStoredFieldsFormatTestCase.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.index;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -27,6 +28,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
@@ -43,6 +46,8 @@ import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
@@ -819,4 +824,128 @@ public abstract class BaseStoredFieldsFormatTestCase extends BaseIndexFileFormat
     IOUtils.close(iw, ir, everything);
     IOUtils.close(dirs);
   }
+
+  public void testRandomStoredFieldsWithIndexSort() throws Exception {
+    final SortField[] sortFields;
+    if (random().nextBoolean()) {
+      sortFields =
+          new SortField[]{
+              new SortField("sort-1", SortField.Type.LONG),
+              new SortField("sort-2", SortField.Type.INT)
+          };
+    } else {
+      sortFields = new SortField[]{new SortField("sort-1", SortField.Type.LONG)};
+    }
+    List<String> storedFields = new ArrayList<>();
+    int numFields = TestUtil.nextInt(random(), 1, 10);
+    for (int i = 0; i < numFields; i++) {
+      storedFields.add("f-" + i);
+    }
+    FieldType storeType = new FieldType(TextField.TYPE_STORED);
+    storeType.setStored(true);
+    Function<String, Document> documentFactory =
+        id -> {
+          Document doc = new Document();
+          doc.add(new StringField("id", id, random().nextBoolean() ? Store.YES : Store.NO));
+          if (random().nextInt(100) <= 5) {
+            Collections.shuffle(storedFields, random());
+          }
+          for (String fieldName : storedFields) {
+            if (random().nextBoolean()) {
+              String s = TestUtil.randomUnicodeString(random(), 100);
+              doc.add(newField(fieldName, s, storeType));
+            }
+          }
+          for (SortField sortField : sortFields) {
+            doc.add(
+                new NumericDocValuesField(
+                    sortField.getField(), TestUtil.nextInt(random(), 0, 10000)));
+          }
+          return doc;
+        };
+
+    Map<String, Document> docs = new HashMap<>();
+    int numDocs = atLeast(100);
+    for (int i = 0; i < numDocs; i++) {
+      String id = Integer.toString(i);
+      docs.put(id, documentFactory.apply(id));
+    }
+
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 5, 20));
+    iwc.setIndexSort(new Sort(sortFields));
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    List<String> addedIds = new ArrayList<>();
+    Runnable verifyStoreFields =
+        () -> {
+          try (DirectoryReader reader = maybeWrapWithMergingReader(iw.getReader())) {
+            IndexSearcher searcher = new IndexSearcher(reader);
+            int iters = TestUtil.nextInt(random(), 1, 10);
+            for (int i = 0; i < iters; i++) {
+              String testID = addedIds.get(random().nextInt(addedIds.size()));
+              if (VERBOSE) {
+                System.out.println("TEST: test id=" + testID);
+              }
+              TopDocs hits = searcher.search(new TermQuery(new Term("id", testID)), 1);
+              assertEquals(1, hits.totalHits.value);
+              List<IndexableField> expectedFields =
+                  docs.get(testID).getFields().stream()
+                      .filter(f -> f.fieldType().stored())
+                      .collect(Collectors.toList());
+              Document actualDoc = reader.document(hits.scoreDocs[0].doc);
+              assertEquals(expectedFields.size(), actualDoc.getFields().size());
+              for (IndexableField expectedField : expectedFields) {
+                IndexableField[] actualFields = actualDoc.getFields(expectedField.name());
+                assertEquals(1, actualFields.length);
+                assertEquals(expectedField.stringValue(), actualFields[0].stringValue());
+              }
+            }
+          } catch (IOException e) {
+            throw new UncheckedIOException(e);
+          }
+        };
+    final List<String> ids = new ArrayList<>(docs.keySet());
+    Collections.shuffle(ids, random());
+    for (String id : ids) {
+      if (random().nextInt(100) < 5) {
+        // add via foreign reader
+        IndexWriterConfig otherIwc = newIndexWriterConfig();
+        otherIwc.setIndexSort(new Sort(sortFields));
+        try (Directory otherDir = newDirectory();
+             RandomIndexWriter otherIw = new RandomIndexWriter(random(), otherDir, otherIwc)) {
+          otherIw.addDocument(docs.get(id));
+          try (DirectoryReader otherReader = otherIw.getReader()) {
+            TestUtil.addIndexesSlowly(iw.w, otherReader);
+          }
+        }
+      } else {
+        // add normally
+        iw.addDocument(docs.get(id));
+      }
+      addedIds.add(id);
+      if (random().nextInt(100) < 5) {
+        String deletingId = addedIds.remove(random().nextInt(addedIds.size()));
+        if (random().nextBoolean()) {
+          iw.deleteDocuments(new TermQuery(new Term("id", deletingId)));
+          addedIds.remove(deletingId);
+        } else {
+          final Document newDoc = documentFactory.apply(deletingId);
+          docs.put(deletingId, newDoc);
+          iw.updateDocument(new Term("id", deletingId), newDoc);
+        }
+      }
+      if (random().nextInt(100) < 5) {
+        verifyStoreFields.run();
+      }
+      if (random().nextInt(100) < 2) {
+        iw.forceMerge(TestUtil.nextInt(random(), 1, 3));
+      }
+    }
+    verifyStoreFields.run();
+    iw.forceMerge(TestUtil.nextInt(random(), 1, 3));
+    verifyStoreFields.run();
+    IOUtils.close(iw, dir);
+  }
+
 }