You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2021/06/10 15:54:24 UTC

[lucene] branch main updated: Revert "LUCENE-9935: Enable bulk-merge for term vectors with index sort (#140)"

This is an automated email from the ASF dual-hosted git repository.

dnhatn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 69ab144  Revert "LUCENE-9935: Enable bulk-merge for term vectors with index sort (#140)"
69ab144 is described below

commit 69ab1447a7dfe12ab81e0b11185950451d1fd807
Author: Nhat Nguyen <nh...@elastic.co>
AuthorDate: Thu Jun 10 11:54:11 2021 -0400

    Revert "LUCENE-9935: Enable bulk-merge for term vectors with index sort (#140)"
    
    This reverts commit 54fb21e862c2041cb907517ed993c8ece898cb26.
---
 .../Lucene90CompressingTermVectorsReader.java      |  29 +--
 .../Lucene90CompressingTermVectorsWriter.java      | 284 +++++++++------------
 .../index/BaseTermVectorsFormatTestCase.java       | 107 ++------
 3 files changed, 145 insertions(+), 275 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
index 802eb1c..ec8823d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
@@ -91,7 +91,6 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
   private final long numDirtyChunks; // number of incomplete compressed blocks written
   private final long numDirtyDocs; // cumulative number of docs in incomplete chunks
   private final long maxPointer; // end of the data section
-  private BlockState blockState = new BlockState(-1, -1, 0);
 
   // used by clone
   private Lucene90CompressingTermVectorsReader(Lucene90CompressingTermVectorsReader reader) {
@@ -311,45 +310,25 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
     return new ByteBuffersDataInput(Collections.singletonList(ByteBuffer.wrap(bytes)));
   }
 
-  /** Checks if a given docID was loaded in the current block state. */
-  boolean isLoaded(int docID) {
-    return blockState.docBase <= docID && docID < blockState.docBase + blockState.chunkDocs;
-  }
-
-  private static class BlockState {
-    final long startPointer;
-    final int docBase;
-    final int chunkDocs;
-
-    BlockState(long startPointer, int docBase, int chunkDocs) {
-      this.startPointer = startPointer;
-      this.docBase = docBase;
-      this.chunkDocs = chunkDocs;
-    }
-  }
-
   @Override
   public Fields get(int doc) throws IOException {
     ensureOpen();
 
     // seek to the right place
-    final long startPointer;
-    if (isLoaded(doc)) {
-      startPointer = blockState.startPointer; // avoid searching the start pointer
-    } else {
-      startPointer = indexReader.getStartPointer(doc);
+    {
+      final long startPointer = indexReader.getStartPointer(doc);
+      vectorsStream.seek(startPointer);
     }
 
     // decode
     // - docBase: first doc ID of the chunk
     // - chunkDocs: number of docs of the chunk
     final int docBase = vectorsStream.readVInt();
-    final int chunkDocs = vectorsStream.readVInt() >>> 1;
+    final int chunkDocs = vectorsStream.readVInt();
     if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
       throw new CorruptIndexException(
           "docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc, vectorsStream);
     }
-    this.blockState = new BlockState(startPointer, docBase, chunkDocs);
 
     final int skip; // number of fields to skip
     final int numFields; // number of fields of the document we're looking for
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
index 638b9e7..ed54ce7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsWriter.java
@@ -16,11 +16,8 @@
  */
 package org.apache.lucene.codecs.lucene90.compressing;
 
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
 import java.io.IOException;
 import java.util.ArrayDeque;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Deque;
@@ -35,7 +32,6 @@ import org.apache.lucene.codecs.compressing.CompressionMode;
 import org.apache.lucene.codecs.compressing.Compressor;
 import org.apache.lucene.codecs.compressing.MatchingReaders;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.DocIDMerger;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
@@ -50,6 +46,7 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
@@ -328,7 +325,7 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
     payloadBytes.reset();
     ++numDocs;
     if (triggerFlush()) {
-      flush(false);
+      flush();
     }
     curDoc = null;
   }
@@ -382,22 +379,17 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
     return termSuffixes.size() >= chunkSize || pendingDocs.size() >= maxDocsPerChunk;
   }
 
-  private void flush(boolean force) throws IOException {
-    assert force != triggerFlush();
+  private void flush() throws IOException {
+    numChunks++;
     final int chunkDocs = pendingDocs.size();
     assert chunkDocs > 0 : chunkDocs;
-    numChunks++;
-    if (force) {
-      numDirtyChunks++; // incomplete: we had to force this flush
-      numDirtyDocs += pendingDocs.size();
-    }
+
     // write the index file
     indexWriter.writeIndex(chunkDocs, vectorsStream.getFilePointer());
 
     final int docBase = numDocs - chunkDocs;
     vectorsStream.writeVInt(docBase);
-    final int dirtyBit = force ? 1 : 0;
-    vectorsStream.writeVInt((chunkDocs << 1) | dirtyBit);
+    vectorsStream.writeVInt(chunkDocs);
 
     // total number of fields of the chunk
     final int totalFields = flushNumFields(chunkDocs);
@@ -723,7 +715,9 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
   @Override
   public void finish(FieldInfos fis, int numDocs) throws IOException {
     if (!pendingDocs.isEmpty()) {
-      flush(true);
+      numDirtyChunks++; // incomplete: we had to force this flush
+      numDirtyDocs += pendingDocs.size();
+      flush();
     }
     if (numDocs != this.numDocs) {
       throw new RuntimeException(
@@ -812,131 +806,127 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
     BULK_MERGE_ENABLED = v;
   }
 
-  private void copyChunks(
-      final MergeState mergeState,
-      final CompressingTermVectorsSub sub,
-      final int fromDocID,
-      final int toDocID)
-      throws IOException {
-    final Lucene90CompressingTermVectorsReader reader =
-        (Lucene90CompressingTermVectorsReader) mergeState.termVectorsReaders[sub.readerIndex];
-    assert reader.getVersion() == VERSION_CURRENT;
-    assert reader.getChunkSize() == chunkSize;
-    assert reader.getCompressionMode() == compressionMode;
-    assert !tooDirty(reader);
-    assert mergeState.liveDocs[sub.readerIndex] == null;
-
-    int docID = fromDocID;
-    final FieldsIndex index = reader.getIndexReader();
-
-    // copy docs that belong to the previous chunk
-    while (docID < toDocID && reader.isLoaded(docID)) {
-      addAllDocVectors(reader.get(docID++), mergeState);
-    }
-
-    if (docID >= toDocID) {
-      return;
+  @Override
+  public int merge(MergeState mergeState) throws IOException {
+    if (mergeState.needsIndexSort) {
+      // TODO: can we gain back some optos even if index is sorted?  E.g. if sort results in large
+      // chunks of contiguous docs from one sub
+      // being copied over...?
+      return super.merge(mergeState);
     }
-    // copy chunks
-    long fromPointer = index.getStartPointer(docID);
-    final long toPointer =
-        toDocID == sub.maxDoc ? reader.getMaxPointer() : index.getStartPointer(toDocID);
-    if (fromPointer < toPointer) {
-      // flush any pending chunks
-      if (!pendingDocs.isEmpty()) {
-        flush(true);
+    int docCount = 0;
+    int numReaders = mergeState.maxDocs.length;
+
+    MatchingReaders matching = new MatchingReaders(mergeState);
+
+    for (int readerIndex = 0; readerIndex < numReaders; readerIndex++) {
+      Lucene90CompressingTermVectorsReader matchingVectorsReader = null;
+      final TermVectorsReader vectorsReader = mergeState.termVectorsReaders[readerIndex];
+      if (matching.matchingReaders[readerIndex]) {
+        // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
+        if (vectorsReader != null
+            && vectorsReader instanceof Lucene90CompressingTermVectorsReader) {
+          matchingVectorsReader = (Lucene90CompressingTermVectorsReader) vectorsReader;
+        }
       }
-      final IndexInput rawDocs = reader.getVectorsStream();
-      rawDocs.seek(fromPointer);
-      do {
+
+      final int maxDoc = mergeState.maxDocs[readerIndex];
+      final Bits liveDocs = mergeState.liveDocs[readerIndex];
+
+      if (matchingVectorsReader != null
+          && matchingVectorsReader.getCompressionMode() == compressionMode
+          && matchingVectorsReader.getChunkSize() == chunkSize
+          && matchingVectorsReader.getVersion() == VERSION_CURRENT
+          && matchingVectorsReader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT
+          && BULK_MERGE_ENABLED
+          && liveDocs == null
+          && !tooDirty(matchingVectorsReader)) {
+        // optimized merge, raw byte copy
+        // its not worth fine-graining this if there are deletions.
+
+        matchingVectorsReader.checkIntegrity();
+
+        // flush any pending chunks
+        if (!pendingDocs.isEmpty()) {
+          numDirtyChunks++; // incomplete: we had to force this flush
+          numDirtyDocs += pendingDocs.size();
+          flush();
+        }
+
         // iterate over each chunk. we use the vectors index to find chunk boundaries,
         // read the docstart + doccount from the chunk header (we write a new header, since doc
         // numbers will change),
         // and just copy the bytes directly.
-        // read header
-        final int base = rawDocs.readVInt();
-        if (base != docID) {
-          throw new CorruptIndexException(
-              "invalid state: base=" + base + ", docID=" + docID, rawDocs);
-        }
+        IndexInput rawDocs = matchingVectorsReader.getVectorsStream();
+        FieldsIndex index = matchingVectorsReader.getIndexReader();
+        rawDocs.seek(index.getStartPointer(0));
+        int docID = 0;
+        while (docID < maxDoc) {
+          // read header
+          int base = rawDocs.readVInt();
+          if (base != docID) {
+            throw new CorruptIndexException(
+                "invalid state: base=" + base + ", docID=" + docID, rawDocs);
+          }
+          int bufferedDocs = rawDocs.readVInt();
+
+          // write a new index entry and new header for this chunk.
+          indexWriter.writeIndex(bufferedDocs, vectorsStream.getFilePointer());
+          vectorsStream.writeVInt(docCount); // rebase
+          vectorsStream.writeVInt(bufferedDocs);
+          docID += bufferedDocs;
+          docCount += bufferedDocs;
+          numDocs += bufferedDocs;
+
+          if (docID > maxDoc) {
+            throw new CorruptIndexException(
+                "invalid state: base=" + base + ", count=" + bufferedDocs + ", maxDoc=" + maxDoc,
+                rawDocs);
+          }
 
-        final int code = rawDocs.readVInt();
-        final int bufferedDocs = code >>> 1;
+          // copy bytes until the next chunk boundary (or end of chunk data).
+          // using the stored fields index for this isn't the most efficient, but fast enough
+          // and is a source of redundancy for detecting bad things.
+          final long end;
+          if (docID == maxDoc) {
+            end = matchingVectorsReader.getMaxPointer();
+          } else {
+            end = index.getStartPointer(docID);
+          }
+          vectorsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
+        }
 
-        // write a new index entry and new header for this chunk.
-        indexWriter.writeIndex(bufferedDocs, vectorsStream.getFilePointer());
-        vectorsStream.writeVInt(numDocs); // rebase
-        vectorsStream.writeVInt(code);
-        docID += bufferedDocs;
-        numDocs += bufferedDocs;
-        if (docID > toDocID) {
+        if (rawDocs.getFilePointer() != matchingVectorsReader.getMaxPointer()) {
           throw new CorruptIndexException(
-              "invalid state: base=" + base + ", count=" + bufferedDocs + ", toDocID=" + toDocID,
+              "invalid state: pos="
+                  + rawDocs.getFilePointer()
+                  + ", max="
+                  + matchingVectorsReader.getMaxPointer(),
               rawDocs);
         }
 
-        // copy bytes until the next chunk boundary (or end of chunk data).
-        // using the stored fields index for this isn't the most efficient, but fast enough
-        // and is a source of redundancy for detecting bad things.
-        final long end;
-        if (docID == sub.maxDoc) {
-          end = reader.getMaxPointer();
-        } else {
-          end = index.getStartPointer(docID);
-        }
-        vectorsStream.copyBytes(rawDocs, end - rawDocs.getFilePointer());
-        ++numChunks;
-        boolean dirtyChunk = (code & 1) != 0;
-        if (dirtyChunk) {
-          numDirtyChunks++;
-          numDirtyDocs += bufferedDocs;
+        // since we bulk merged all chunks, we inherit any dirty ones from this segment.
+        numChunks += matchingVectorsReader.getNumChunks();
+        numDirtyChunks += matchingVectorsReader.getNumDirtyChunks();
+        numDirtyDocs += matchingVectorsReader.getNumDirtyDocs();
+      } else {
+        // naive merge...
+        if (vectorsReader != null) {
+          vectorsReader.checkIntegrity();
         }
-        fromPointer = end;
-      } while (fromPointer < toPointer);
-    }
-    // copy leftover docs that don't form a complete chunk
-    assert reader.isLoaded(docID) == false;
-    while (docID < toDocID) {
-      addAllDocVectors(reader.get(docID++), mergeState);
-    }
-  }
-
-  @Override
-  public int merge(MergeState mergeState) throws IOException {
-    final int numReaders = mergeState.termVectorsReaders.length;
-    final MatchingReaders matchingReaders = new MatchingReaders(mergeState);
-    final List<CompressingTermVectorsSub> subs = new ArrayList<>(numReaders);
-    for (int i = 0; i < numReaders; i++) {
-      final TermVectorsReader reader = mergeState.termVectorsReaders[i];
-      if (reader != null) {
-        reader.checkIntegrity();
-      }
-      final boolean bulkMerge = canPerformBulkMerge(mergeState, matchingReaders, i);
-      subs.add(new CompressingTermVectorsSub(mergeState, bulkMerge, i));
-    }
-    int docCount = 0;
-    final DocIDMerger<CompressingTermVectorsSub> docIDMerger =
-        DocIDMerger.of(subs, mergeState.needsIndexSort);
-    CompressingTermVectorsSub sub = docIDMerger.next();
-    while (sub != null) {
-      assert sub.mappedDocID == docCount : sub.mappedDocID + " != " + docCount;
-      if (sub.canPerformBulkMerge) {
-        final int fromDocID = sub.docID;
-        int toDocID = fromDocID;
-        final CompressingTermVectorsSub current = sub;
-        while ((sub = docIDMerger.next()) == current) {
-          ++toDocID;
-          assert sub.docID == toDocID;
+        for (int i = 0; i < maxDoc; i++) {
+          if (liveDocs != null && liveDocs.get(i) == false) {
+            continue;
+          }
+          Fields vectors;
+          if (vectorsReader == null) {
+            vectors = null;
+          } else {
+            vectors = vectorsReader.get(i);
+          }
+          addAllDocVectors(vectors, mergeState);
+          ++docCount;
         }
-        ++toDocID; // exclusive bound
-        copyChunks(mergeState, current, fromDocID, toDocID);
-        docCount += toDocID - fromDocID;
-      } else {
-        final TermVectorsReader reader = mergeState.termVectorsReaders[sub.readerIndex];
-        final Fields vectors = reader != null ? reader.get(sub.docID) : null;
-        addAllDocVectors(vectors, mergeState);
-        ++docCount;
-        sub = docIDMerger.next();
       }
     }
     finish(mergeState.mergeFieldInfos, docCount);
@@ -958,48 +948,6 @@ public final class Lucene90CompressingTermVectorsWriter extends TermVectorsWrite
         && candidate.getNumDirtyChunks() * 100 > candidate.getNumChunks();
   }
 
-  private boolean canPerformBulkMerge(
-      MergeState mergeState, MatchingReaders matchingReaders, int readerIndex) {
-    if (mergeState.termVectorsReaders[readerIndex]
-        instanceof Lucene90CompressingTermVectorsReader) {
-      final Lucene90CompressingTermVectorsReader reader =
-          (Lucene90CompressingTermVectorsReader) mergeState.termVectorsReaders[readerIndex];
-      return BULK_MERGE_ENABLED
-          && matchingReaders.matchingReaders[readerIndex]
-          && reader.getCompressionMode() == compressionMode
-          && reader.getChunkSize() == chunkSize
-          && reader.getVersion() == VERSION_CURRENT
-          && reader.getPackedIntsVersion() == PackedInts.VERSION_CURRENT
-          && mergeState.liveDocs[readerIndex] == null
-          && !tooDirty(reader);
-    }
-    return false;
-  }
-
-  private static class CompressingTermVectorsSub extends DocIDMerger.Sub {
-    final int maxDoc;
-    final int readerIndex;
-    final boolean canPerformBulkMerge;
-    int docID = -1;
-
-    CompressingTermVectorsSub(MergeState mergeState, boolean canPerformBulkMerge, int readerIndex) {
-      super(mergeState.docMaps[readerIndex]);
-      this.maxDoc = mergeState.maxDocs[readerIndex];
-      this.readerIndex = readerIndex;
-      this.canPerformBulkMerge = canPerformBulkMerge;
-    }
-
-    @Override
-    public int nextDoc() {
-      docID++;
-      if (docID == maxDoc) {
-        return NO_MORE_DOCS;
-      } else {
-        return docID;
-      }
-    }
-  }
-
   @Override
   public long ramBytesUsed() {
     return positionsBuf.length
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
index 12090ce..0d0604e 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
@@ -25,14 +25,12 @@ import static org.apache.lucene.index.PostingsEnum.POSITIONS;
 
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import java.io.IOException;
-import java.io.UncheckedIOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicReference;
@@ -51,20 +49,16 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.TestUtil;
 
 /**
@@ -673,94 +667,43 @@ public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatT
     dir.close();
   }
 
-  private void doTestMerge(Sort indexSort, boolean allowDeletes) throws IOException {
+  public void testMerge() throws IOException {
     final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
     final int numDocs = atLeast(100);
+    final int numDeletes = random().nextInt(numDocs);
+    final Set<Integer> deletes = new HashSet<>();
+    while (deletes.size() < numDeletes) {
+      deletes.add(random().nextInt(numDocs));
+    }
     for (Options options : validOptions()) {
-      Map<String, RandomDocument> docs = new HashMap<>();
+      final RandomDocument[] docs = new RandomDocument[numDocs];
       for (int i = 0; i < numDocs; ++i) {
-        docs.put(
-            Integer.toString(i),
-            docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), atLeast(10), options));
+        docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
       }
       final Directory dir = newDirectory();
-      final IndexWriterConfig iwc = newIndexWriterConfig();
-      if (indexSort != null) {
-        iwc.setIndexSort(indexSort);
-      }
-      final RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-      List<String> liveDocIDs = new ArrayList<>();
-      List<String> ids = new ArrayList<>(docs.keySet());
-      Collections.shuffle(ids, random());
-      Runnable verifyTermVectors =
-          () -> {
-            try (DirectoryReader reader = maybeWrapWithMergingReader(writer.getReader())) {
-              for (String id : liveDocIDs) {
-                final int docID = docID(reader, id);
-                assertEquals(docs.get(id), reader.getTermVectors(docID));
-              }
-            } catch (IOException e) {
-              throw new UncheckedIOException(e);
-            }
-          };
-      for (String id : ids) {
-        final Document doc = addId(docs.get(id).toDocument(), id);
-        if (indexSort != null) {
-          for (SortField sortField : indexSort.getSort()) {
-            doc.add(
-                new NumericDocValuesField(
-                    sortField.getField(), TestUtil.nextInt(random(), 0, 1024)));
-          }
-        }
-        if (random().nextInt(100) < 5) {
-          // add via foreign writer
-          IndexWriterConfig otherIwc = newIndexWriterConfig();
-          if (indexSort != null) {
-            otherIwc.setIndexSort(indexSort);
-          }
-          try (Directory otherDir = newDirectory();
-              RandomIndexWriter otherIw = new RandomIndexWriter(random(), otherDir, otherIwc)) {
-            otherIw.addDocument(doc);
-            try (DirectoryReader otherReader = otherIw.getReader()) {
-              TestUtil.addIndexesSlowly(writer.w, otherReader);
-            }
-          }
-        } else {
-          writer.addDocument(doc);
-        }
-        liveDocIDs.add(id);
-        if (allowDeletes && random().nextInt(100) < 20) {
-          final String deleteId = liveDocIDs.remove(random().nextInt(liveDocIDs.size()));
-          writer.deleteDocuments(new Term("id", deleteId));
-        }
+      final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+      for (int i = 0; i < numDocs; ++i) {
+        writer.addDocument(addId(docs[i].toDocument(), "" + i));
         if (rarely()) {
           writer.commit();
-          verifyTermVectors.run();
-        }
-        if (rarely()) {
-          writer.forceMerge(1);
-          verifyTermVectors.run();
         }
       }
-      verifyTermVectors.run();
+      for (int delete : deletes) {
+        writer.deleteDocuments(new Term("id", "" + delete));
+      }
+      // merge with deletes
       writer.forceMerge(1);
-      verifyTermVectors.run();
-      IOUtils.close(writer, dir);
-    }
-  }
-
-  public void testMergeWithIndexSort() throws IOException {
-    SortField[] sortFields = new SortField[TestUtil.nextInt(random(), 1, 2)];
-    for (int i = 0; i < sortFields.length; i++) {
-      sortFields[i] = new SortField("sort_field_" + i, SortField.Type.LONG);
+      final IndexReader reader = writer.getReader();
+      for (int i = 0; i < numDocs; ++i) {
+        if (!deletes.contains(i)) {
+          final int docID = docID(reader, "" + i);
+          assertEquals(docs[i], reader.getTermVectors(docID));
+        }
+      }
+      reader.close();
+      writer.close();
+      dir.close();
     }
-    doTestMerge(new Sort(sortFields), false);
-    doTestMerge(new Sort(sortFields), true);
-  }
-
-  public void testMergeWithoutIndexSort() throws IOException {
-    doTestMerge(null, false);
-    doTestMerge(null, true);
   }
 
   // run random tests from different threads to make sure the per-thread clones