You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2018/08/24 13:04:24 UTC

lucene-solr:branch_7x: LUCENE-8458: Ensure init PendingSoftDeletes when carry-over deletes

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x bdaa9e8f5 -> da37ffb51


LUCENE-8458: Ensure init PendingSoftDeletes when carry-over deletes

Today when carrying over hard-deletes after merging segments, we might
not adjust soft-deletes count accordingly because we do not always
ensure that the PendingSoftDeletes of the new segment is initialized.

This change fixes the initialization condition in PendingSoftDeletes and
makes sure it is initialized before accepting deletes.

Co-authored-by: Simon Willnauer <si...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/da37ffb5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/da37ffb5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/da37ffb5

Branch: refs/heads/branch_7x
Commit: da37ffb510540af930a79eb1535258b5047a4eba
Parents: bdaa9e8
Author: Nhat Nguyen <nh...@elastic.co>
Authored: Fri Aug 24 08:33:18 2018 -0400
Committer: Nhat Nguyen <nh...@elastic.co>
Committed: Fri Aug 24 08:49:14 2018 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +
 .../org/apache/lucene/index/PendingDeletes.java | 13 ++-
 .../apache/lucene/index/PendingSoftDeletes.java |  7 +-
 .../apache/lucene/index/ReadersAndUpdates.java  |  3 +
 .../lucene/index/TestPendingSoftDeletes.java    |  8 +-
 .../TestSoftDeletesRetentionMergePolicy.java    | 94 +++++++++++++++++---
 6 files changed, 111 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da37ffb5/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7b09849..a0415dd 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -79,6 +79,9 @@ Bug Fixes:
 * LUCENE-8441: IndexWriter now checks doc value type for index sort fields
   and fails the document if they are not compatible. (Jim Ferenczi, Mike McCandless)
 
+* LUCENE-8458: Adjust initialization condition of PendingSoftDeletes and ensures
+  it is initialized before accepting deletes (Simon Willnauer, Nhat Nguyen)
+
 Changes in Runtime Behavior:
 
 * LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da37ffb5/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
index 4ab037c..157ae41 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
@@ -39,7 +39,7 @@ class PendingDeletes {
   // case getMutableBits needs to be called
   private FixedBitSet writeableLiveDocs;
   protected int pendingDeleteCount;
-  private boolean liveDocsInitialized;
+  boolean liveDocsInitialized;
 
   PendingDeletes(SegmentReader reader, SegmentCommitInfo info) {
     this(info, reader.getLiveDocs(), true);
@@ -53,7 +53,7 @@ class PendingDeletes {
     // For segments that were published we enforce a reader in the BufferedUpdatesStream.SegmentState ctor
   }
 
-  private PendingDeletes(SegmentCommitInfo info, Bits liveDocs, boolean liveDocsInitialized) {
+  PendingDeletes(SegmentCommitInfo info, Bits liveDocs, boolean liveDocsInitialized) {
     this.info = info;
     this.liveDocs = liveDocs;
     pendingDeleteCount = 0;
@@ -279,4 +279,13 @@ class PendingDeletes {
         " info.getDelCount()=" + info.getDelCount();
     return true;
   }
+
+  /**
+   * Returns {@code true} if we have to initialize this PendingDeletes before {@link #delete(int)};
+   * otherwise this PendingDeletes is ready to accept deletes. A PendingDeletes can be initialized
+   * by providing it a reader via {@link #onNewReader(CodecReader, SegmentCommitInfo)}.
+   */
+  boolean mustInitOnDelete() {
+    return false;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da37ffb5/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
index eb6e4ff..926295f 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
@@ -37,7 +37,7 @@ final class PendingSoftDeletes extends PendingDeletes {
   private final PendingDeletes hardDeletes;
 
   PendingSoftDeletes(String field, SegmentCommitInfo info)  {
-    super(info);
+    super(info, null, info.getDelCount(true) == 0);
     this.field = field;
     hardDeletes = new PendingDeletes(info);
   }
@@ -230,6 +230,11 @@ final class PendingSoftDeletes extends PendingDeletes {
     return hardDeletes.getLiveDocs();
   }
 
+  @Override
+  boolean mustInitOnDelete() {
+    return liveDocsInitialized == false;
+  }
+
   static int countSoftDeletes(DocIdSetIterator softDeletedDocs, Bits hardDeletes) throws IOException {
     int count = 0;
     if (softDeletedDocs != null) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da37ffb5/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index b09338f..9afff9c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@@ -184,6 +184,9 @@ final class ReadersAndUpdates {
   }
 
   public synchronized boolean delete(int docID) throws IOException {
+    if (reader == null && pendingDeletes.mustInitOnDelete()) {
+      getReader(IOContext.READ).decRef(); // pass a reader to initialize the pending deletes
+    }
     return pendingDeletes.delete(docID);
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da37ffb5/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
index 70f43a0..7d03c7e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
@@ -65,7 +65,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     writer.softUpdateDocument(new Term("id", "2"), doc,
         new NumericDocValuesField("_soft_deletes", 1));
     writer.commit();
-    DirectoryReader reader = writer.getReader();
+    DirectoryReader reader = DirectoryReader.open(dir);
     assertEquals(1, reader.leaves().size());
     SegmentReader segmentReader = (SegmentReader) reader.leaves().get(0).reader();
     SegmentCommitInfo segmentInfo = segmentReader.getSegmentInfo();
@@ -105,7 +105,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     writer.softUpdateDocument(new Term("id", "2"), doc,
         new NumericDocValuesField("_soft_deletes", 1));
     writer.commit();
-    DirectoryReader reader = writer.getReader();
+    DirectoryReader reader = DirectoryReader.open(dir);
     assertEquals(1, reader.leaves().size());
     SegmentReader segmentReader = (SegmentReader) reader.leaves().get(0).reader();
     SegmentCommitInfo segmentInfo = segmentReader.getSegmentInfo();
@@ -222,7 +222,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     writer.softUpdateDocument(new Term("id", "2"), doc,
         new NumericDocValuesField("_soft_deletes", 1));
     writer.commit();
-    DirectoryReader reader = writer.getReader();
+    DirectoryReader reader = DirectoryReader.open(dir);
     assertEquals(1, reader.leaves().size());
     SegmentReader segmentReader = (SegmentReader) reader.leaves().get(0).reader();
     SegmentCommitInfo segmentInfo = segmentReader.getSegmentInfo();
@@ -270,7 +270,7 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     writer.softUpdateDocument(new Term("id", "2"), doc,
         new NumericDocValuesField("_soft_deletes", 1));
     writer.commit();
-    DirectoryReader reader = writer.getReader();
+    DirectoryReader reader = DirectoryReader.open(dir);
     assertEquals(1, reader.leaves().size());
     SegmentReader segmentReader = (SegmentReader) reader.leaves().get(0).reader();
     SegmentCommitInfo segmentInfo = segmentReader.getSegmentInfo();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/da37ffb5/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
index f6c1f6c..dc1c116 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
@@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Supplier;
 
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.IntPoint;
@@ -594,7 +595,7 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
     }
     while (true) {
       try (DirectoryReader reader = writer.getReader()) {
-        TopDocs topDocs = new IndexSearcher(new NoDeletesWrapper(reader)).search(new TermQuery(new Term("id", "1")), 1);
+        TopDocs topDocs = new IndexSearcher(new IncludeSoftDeletesWrapper(reader)).search(new TermQuery(new Term("id", "1")), 1);
         assertEquals(1, topDocs.totalHits);
         if (writer.tryDeleteDocument(reader, topDocs.scoreDocs[0].doc) > 0) {
           break;
@@ -630,11 +631,58 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
     IOUtils.close(sm, writer, dir);
   }
 
+  public void testMixedSoftDeletesAndHardDeletes() throws Exception {
+    Directory dir = newDirectory();
+    String softDeletesField = "soft-deletes";
+    IndexWriterConfig config = newIndexWriterConfig()
+        .setMaxBufferedDocs(2 + random().nextInt(50)).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+        .setSoftDeletesField(softDeletesField)
+        .setMergePolicy(new SoftDeletesRetentionMergePolicy(softDeletesField, MatchAllDocsQuery::new, newMergePolicy()));
+    IndexWriter writer = new IndexWriter(dir, config);
+    int numDocs = 10 + random().nextInt(100);
+    Set<String> liveDocs = new HashSet<>();
+    for (int i = 0; i < numDocs; i++) {
+      String id = Integer.toString(i);
+      Document doc = new Document();
+      doc.add(new StringField("id", id, Field.Store.YES));
+      writer.addDocument(doc);
+      liveDocs.add(id);
+    }
+    for (int i = 0; i < numDocs; i++) {
+      if (random().nextBoolean()) {
+        String id = Integer.toString(i);
+        if (random().nextBoolean() && liveDocs.contains(id)) {
+          doUpdate(new Term("id", id), writer, new NumericDocValuesField(softDeletesField, 1));
+        } else {
+          Document doc = new Document();
+          doc.add(new StringField("id", "v" + id, Field.Store.YES));
+          writer.softUpdateDocument(new Term("id", id), doc, new NumericDocValuesField(softDeletesField, 1));
+          liveDocs.add("v" + id);
+        }
+      }
+      if (random().nextBoolean() && liveDocs.isEmpty() == false) {
+        String delId = RandomPicks.randomFrom(random(), liveDocs);
+        if (random().nextBoolean()) {
+          doDelete(new Term("id", delId), writer);
+        } else {
+          writer.deleteDocuments(new Term("id", delId));
+        }
+        liveDocs.remove(delId);
+      }
+    }
+    try (DirectoryReader unwrapped = writer.getReader()) {
+      DirectoryReader reader = new IncludeSoftDeletesWrapper(unwrapped);
+      assertEquals(liveDocs.size(), reader.numDocs());
+    }
+    writer.commit();
+    IOUtils.close(writer, dir);
+  }
+
   static void doUpdate(Term doc, IndexWriter writer, Field... fields) throws IOException {
     long seqId = -1;
     do { // retry if we just committing a merge
       try (DirectoryReader reader = writer.getReader()) {
-        TopDocs topDocs = new IndexSearcher(new NoDeletesWrapper(reader)).search(new TermQuery(doc), 10);
+        TopDocs topDocs = new IndexSearcher(new IncludeSoftDeletesWrapper(reader)).search(new TermQuery(doc), 10);
         assertEquals(1, topDocs.totalHits);
         int theDoc = topDocs.scoreDocs[0].doc;
         seqId = writer.tryUpdateDocValue(reader, theDoc, fields);
@@ -642,20 +690,46 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
     } while (seqId == -1);
   }
 
-  private static final class NoDeletesSubReaderWrapper extends FilterDirectoryReader.SubReaderWrapper {
+  static void doDelete(Term doc, IndexWriter writer) throws IOException {
+    long seqId;
+    do { // retry if we just committing a merge
+      try (DirectoryReader reader = writer.getReader()) {
+        TopDocs topDocs = new IndexSearcher(new IncludeSoftDeletesWrapper(reader)).search(new TermQuery(doc), 10);
+        assertEquals(1, topDocs.totalHits);
+        int theDoc = topDocs.scoreDocs[0].doc;
+        seqId = writer.tryDeleteDocument(reader, theDoc);
+      }
+    } while (seqId == -1);
+  }
 
+  private static final class IncludeSoftDeletesSubReaderWrapper extends FilterDirectoryReader.SubReaderWrapper {
     @Override
     public LeafReader wrap(LeafReader reader) {
+      while (reader instanceof FilterLeafReader) {
+        reader = ((FilterLeafReader) reader).getDelegate();
+      }
+      Bits hardLiveDocs = ((SegmentReader) reader).getHardLiveDocs();
+      final int numDocs;
+      if (hardLiveDocs == null) {
+        numDocs = reader.maxDoc();
+      } else {
+        int bits = 0;
+        for (int i = 0; i < hardLiveDocs.length(); i++) {
+          if (hardLiveDocs.get(i)) {
+            bits++;
+          }
+        }
+        numDocs = bits;
+      }
       return new FilterLeafReader(reader) {
-
         @Override
         public int numDocs() {
-          return maxDoc();
+          return numDocs;
         }
 
         @Override
         public Bits getLiveDocs() {
-          return null;
+          return hardLiveDocs;
         }
 
         @Override
@@ -671,15 +745,15 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
     }
   }
 
-  private static final class NoDeletesWrapper extends FilterDirectoryReader {
+  private static final class IncludeSoftDeletesWrapper extends FilterDirectoryReader {
 
-    NoDeletesWrapper(DirectoryReader in) throws IOException {
-      super(in, new NoDeletesSubReaderWrapper());
+    IncludeSoftDeletesWrapper(DirectoryReader in) throws IOException {
+      super(in, new IncludeSoftDeletesSubReaderWrapper());
     }
 
     @Override
     protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
-      return new NoDeletesWrapper(in);
+      return new IncludeSoftDeletesWrapper(in);
     }