You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2018/11/21 13:44:38 UTC

lucene-solr:master: LUCENE-8569: Never count soft-deletes if reader has no hard-deletes

Repository: lucene-solr
Updated Branches:
  refs/heads/master 5f8855ee0 -> 56cb42d20


LUCENE-8569: Never count soft-deletes if reader has no hard-deletes

Today we count the actual soft-deletes during a merge which is
unnecessary if there are no hard-deletes present. In this case, which
is considered to be the common case we can get accurate counts by substracting
the number of deleted docs in the wrapped reader from the number of soft-deletes
in that reader.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/56cb42d2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/56cb42d2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/56cb42d2

Branch: refs/heads/master
Commit: 56cb42d200dd999342c4cd5c92fbfa271ba91153
Parents: 5f8855e
Author: Simon Willnauer <si...@apache.org>
Authored: Tue Nov 20 12:56:20 2018 +0100
Committer: Simon Willnauer <si...@apache.org>
Committed: Wed Nov 21 14:29:00 2018 +0100

----------------------------------------------------------------------
 .../org/apache/lucene/index/IndexWriter.java    | 84 +++++++++++++-------
 1 file changed, 56 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/56cb42d2/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 028554b..f841582 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -69,6 +69,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.StringHelper;
@@ -4352,6 +4353,36 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
     }
   }
 
+  private void countSoftDeletes(CodecReader reader, Bits wrappedLiveDocs, Bits hardLiveDocs, Counter softDeleteCounter,
+                                Counter hardDeleteCounter) throws IOException {
+    int hardDeleteCount = 0;
+    int softDeletesCount = 0;
+    DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), reader);
+    if (softDeletedDocs != null) {
+      int docId;
+      while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) {
+          if (hardLiveDocs == null || hardLiveDocs.get(docId)) {
+            softDeletesCount++;
+          } else {
+            hardDeleteCount++;
+          }
+        }
+      }
+    }
+    softDeleteCounter.addAndGet(softDeletesCount);
+    hardDeleteCounter.addAndGet(hardDeleteCount);
+  }
+
+  private boolean assertSoftDeletesCount(CodecReader reader, int expectedCount) throws IOException {
+    Counter count = Counter.newCounter(false);
+    Counter hardDeletes = Counter.newCounter(false);
+    countSoftDeletes(reader, reader.getLiveDocs(), null, count, hardDeletes);
+    assert count.get() == expectedCount : "soft-deletes count mismatch expected: "
+        + expectedCount  + " but actual: " + count.get() ;
+    return true;
+  }
+
   /** Does the actual (time-consuming) work of the merge,
    *  but without holding synchronized lock on IndexWriter
    *  instance */
@@ -4400,7 +4431,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
 
       // Let the merge wrap readers
       List<CodecReader> mergeReaders = new ArrayList<>();
-      int softDeleteCount = 0;
+      Counter softDeleteCount = Counter.newCounter(false);
       for (int r = 0; r < merge.readers.size(); r++) {
         SegmentReader reader = merge.readers.get(r);
         CodecReader wrappedReader = merge.wrapForMerge(reader);
@@ -4408,34 +4439,31 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
         if (softDeletesEnabled) {
           if (reader != wrappedReader) { // if we don't have a wrapped reader we won't preserve any soft-deletes
             Bits hardLiveDocs = merge.hardLiveDocs.get(r);
-            Bits wrappedLiveDocs = wrappedReader.getLiveDocs();
-            int hardDeleteCount = 0;
-            DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), wrappedReader);
-            if (softDeletedDocs != null) {
-              int docId;
-              while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-                if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) {
-                  if (hardLiveDocs == null || hardLiveDocs.get(docId)) {
-                    softDeleteCount++;
-                  } else {
-                    hardDeleteCount++;
+            if (hardLiveDocs != null) { // we only need to do this accounting if we have mixed deletes
+              Bits wrappedLiveDocs = wrappedReader.getLiveDocs();
+              Counter hardDeleteCounter = Counter.newCounter(false);
+              countSoftDeletes(wrappedReader, wrappedLiveDocs, hardLiveDocs, softDeleteCount, hardDeleteCounter);
+              int hardDeleteCount = Math.toIntExact(hardDeleteCounter.get());
+              // Wrap the wrapped reader again if we have excluded some hard-deleted docs
+              if (hardDeleteCount > 0) {
+                Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() {
+                  @Override
+                  public boolean get(int index) {
+                    return hardLiveDocs.get(index) && wrappedLiveDocs.get(index);
                   }
-                }
+
+                  @Override
+                  public int length() {
+                    return hardLiveDocs.length();
+                  }
+                };
+                wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount);
               }
-            }
-            // Wrap the wrapped reader again if we have excluded some hard-deleted docs
-            if (hardLiveDocs != null && hardDeleteCount > 0) {
-              Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() {
-                @Override
-                public boolean get(int index) {
-                  return hardLiveDocs.get(index) && wrappedLiveDocs.get(index);
-                }
-                @Override
-                public int length() {
-                  return hardLiveDocs.length();
-                }
-              };
-              wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount);
+            } else {
+              final int carryOverSoftDeletes = reader.getSegmentInfo().getSoftDelCount() - wrappedReader.numDeletedDocs();
+              assert carryOverSoftDeletes >= 0 : "carry-over soft-deletes must be positive";
+              assert assertSoftDeletesCount(wrappedReader, carryOverSoftDeletes);
+              softDeleteCount.addAndGet(carryOverSoftDeletes);
             }
           }
         }
@@ -4445,7 +4473,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
                                                      merge.info.info, infoStream, dirWrapper,
                                                      globalFieldNumberMap, 
                                                      context);
-      merge.info.setSoftDelCount(softDeleteCount);
+      merge.info.setSoftDelCount(Math.toIntExact(softDeleteCount.get()));
       merge.checkAborted();
 
       merge.mergeStartNS = System.nanoTime();