You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2018/05/03 11:38:44 UTC

lucene-solr:master: LUCENE-8290: Keep soft deletes in sync with on-disk DocValues

Repository: lucene-solr
Updated Branches:
  refs/heads/master 8b9c2a318 -> 591fc6627


LUCENE-8290: Keep soft deletes in sync with on-disk DocValues

Today we pass on the doc values update to the PendingDeletes
when it's applied. This might cause issues with a rentention policy
merge policy that will see a deleted document but not it's value on
disk.
This change moves back the PendingDeletes callback to flush time
in order to be consistent with what is actually updated on disk.

This change also makes sure we write values to disk on flush that
are in the reader pool as well as extra best effort checks to drop
fully deleted segments on flush, commit and getReader.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/591fc662
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/591fc662
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/591fc662

Branch: refs/heads/master
Commit: 591fc6627acffdc75ce88feb8a912b3225b47f9d
Parents: 8b9c2a3
Author: Simon Willnauer <si...@apache.org>
Authored: Wed May 2 16:17:15 2018 +0200
Committer: Simon Willnauer <si...@apache.org>
Committed: Thu May 3 13:38:35 2018 +0200

----------------------------------------------------------------------
 .../org/apache/lucene/index/IndexWriter.java    | 51 ++++++++++++++------
 .../org/apache/lucene/index/PendingDeletes.java | 13 ++---
 .../apache/lucene/index/PendingSoftDeletes.java |  9 +---
 .../apache/lucene/index/ReadersAndUpdates.java  |  3 +-
 .../lucene/index/TestPendingSoftDeletes.java    |  9 ++--
 .../TestSoftDeletesRetentionMergePolicy.java    |  9 ++--
 6 files changed, 51 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/591fc662/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index b209146..fa631d8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -510,16 +510,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
 
             // TODO: we could instead just clone SIS and pull/incref readers in sync'd block, and then do this w/o IW's lock?
             // Must do this sync'd on IW to prevent a merge from completing at the last second and failing to write its DV updates:
-            if (readerPool.writeAllDocValuesUpdates()) {
-              checkpoint();
-            }
-
-            if (writeAllDeletes) {
-              // Must move the deletes to disk:
-              if (readerPool.commit(segmentInfos)) {
-                checkpointNoSIS();
-              }
-            }
+           writeReaderPool(writeAllDeletes);
 
             // Prevent segmentInfos from changing while opening the
             // reader; in theory we could instead do similar retry logic,
@@ -3132,11 +3123,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
 
             applyAllDeletesAndUpdates();
             synchronized(this) {
-
-              if (readerPool.commit(segmentInfos)) {
-                checkpointNoSIS();
-              }
-
+              writeReaderPool(true);
               if (changeCount.get() != lastCommitChangeCount) {
                 // There are changes to commit, so we will write a new segments_N in startCommit.
                 // The act of committing is itself an NRT-visible change (an NRT reader that was
@@ -3217,6 +3204,39 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       }
     }
   }
+
+  /**
+   * Ensures that all changes in the reader-pool are written to disk.
+   * @param writeDeletes if <code>true</code> if deletes should be written to disk too.
+   */
+  private final void writeReaderPool(boolean writeDeletes) throws IOException {
+    assert Thread.holdsLock(this);
+    if (writeDeletes) {
+      if (readerPool.commit(segmentInfos)) {
+        checkpointNoSIS();
+      }
+    } else { // only write the docValues
+      if (readerPool.writeAllDocValuesUpdates()) {
+        checkpoint();
+      }
+    }
+    // now do some best effort to check if a segment is fully deleted
+    List<SegmentCommitInfo> toDrop = new ArrayList<>(); // don't modify segmentInfos in-place
+    for (SegmentCommitInfo info : segmentInfos) {
+      ReadersAndUpdates readersAndUpdates = readerPool.get(info, false);
+      if (readersAndUpdates != null) {
+        if (isFullyDeleted(readersAndUpdates)) {
+          toDrop.add(info);
+        }
+      }
+    }
+    for (SegmentCommitInfo info : toDrop) {
+      dropDeletedSegment(info);
+    }
+    if (toDrop.isEmpty() == false) {
+      checkpoint();
+    }
+  }
   
   /**
    * Sets the iterator to provide the commit user data map at commit time.  Calling this method
@@ -3503,6 +3523,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       anyChanges |= maybeMerge.getAndSet(false);
       
       synchronized(this) {
+        writeReaderPool(applyAllDeletes);
         doAfterFlush();
         success = true;
         return anyChanges;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/591fc662/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
index ae91be8..1878665 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
@@ -235,18 +235,11 @@ class PendingDeletes {
   }
 
   /**
-   * Called before the given DocValuesFieldUpdates are written to disk
-   * @param info the field to apply
-   */
-  void onDocValuesUpdate(FieldInfo info) {
-  }
-
-  /**
-   * Called for every field update for the given field
-   * @param field the field that's updated
+   * Called for every field update for the given field at flush time
+   * @param info the field info of the field that's updated
    * @param iterator the values to apply
    */
-  void onDocValuesUpdate(String field, DocValuesFieldUpdates.Iterator iterator) throws IOException {
+  void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws IOException {
   }
 
   int numDeletesToMerge(MergePolicy policy, IOSupplier<CodecReader> readerIOSupplier) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/591fc662/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
index a5e7b14..eae25e0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
@@ -120,14 +120,9 @@ final class PendingSoftDeletes extends PendingDeletes {
   }
 
   @Override
-  void onDocValuesUpdate(String field, DocValuesFieldUpdates.Iterator iterator) throws IOException {
-    if (this.field.equals(field)) {
+  void onDocValuesUpdate(FieldInfo info, DocValuesFieldUpdates.Iterator iterator) throws IOException {
+    if (this.field.equals(info.name)) {
       pendingDeleteCount += applySoftDeletes(iterator, getMutableBits());
-    }
-  }
-  @Override
-  void onDocValuesUpdate(FieldInfo info) {
-    if (field.equals(info.name)) {
       assert dvGeneration < info.getDocValuesGen() : "we have seen this generation update already: " + dvGeneration + " vs. " + info.getDocValuesGen();
       assert dvGeneration != -2 : "docValues generation is still uninitialized";
       dvGeneration = info.getDocValuesGen();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/591fc662/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
index b31bc49..6e074ee 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@@ -161,7 +161,6 @@ final class ReadersAndUpdates {
       }
       fieldUpdates.add(update);
     }
-    pendingDeletes.onDocValuesUpdate(update.field, update.iterator());
   }
 
   public synchronized long getNumDVUpdates() {
@@ -334,7 +333,6 @@ final class ReadersAndUpdates {
       final TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(dir);
       final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, updatesContext, segmentSuffix);
       try (final DocValuesConsumer fieldsConsumer = dvFormat.fieldsConsumer(state)) {
-        pendingDeletes.onDocValuesUpdate(fieldInfo);
         Function<FieldInfo, DocValuesFieldUpdates.Iterator> updateSupplier = (info) -> {
           if (info != fieldInfo) {
             throw new IllegalArgumentException("expected field info for field: " + fieldInfo.name + " but got: " + info.name);
@@ -345,6 +343,7 @@ final class ReadersAndUpdates {
           }
           return  DocValuesFieldUpdates.mergedIterator(subs);
         };
+        pendingDeletes.onDocValuesUpdate(fieldInfo, updateSupplier.apply(fieldInfo));
         if (type == DocValuesType.BINARY) {
           fieldsConsumer.addBinaryField(fieldInfo, new EmptyDocValuesProducer() {
             @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/591fc662/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
index 8bd7ca6..7ddf0be 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java
@@ -116,9 +116,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     List<Integer> docsDeleted = Arrays.asList(1, 3, 7, 8, DocIdSetIterator.NO_MORE_DOCS);
     List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 10));
     for (DocValuesFieldUpdates update : updates) {
-      deletes.onDocValuesUpdate(update.field, update.iterator());
+      deletes.onDocValuesUpdate(fieldInfo, update.iterator());
     }
-    deletes.onDocValuesUpdate(fieldInfo);
     assertEquals(4, deletes.numPendingDeletes());
     assertTrue(deletes.getLiveDocs().get(0));
     assertFalse(deletes.getLiveDocs().get(1));
@@ -135,9 +134,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     updates = Arrays.asList(singleUpdate(docsDeleted, 10));
     fieldInfo = new FieldInfo("_soft_deletes", 1, false, false, false, IndexOptions.NONE, DocValuesType.NUMERIC, 1, Collections.emptyMap(), 0, 0);
     for (DocValuesFieldUpdates update : updates) {
-      deletes.onDocValuesUpdate(update.field, update.iterator());
+      deletes.onDocValuesUpdate(fieldInfo, update.iterator());
     }
-    deletes.onDocValuesUpdate(fieldInfo);
     assertEquals(5, deletes.numPendingDeletes());
     assertTrue(deletes.getLiveDocs().get(0));
     assertFalse(deletes.getLiveDocs().get(1));
@@ -180,9 +178,8 @@ public class TestPendingSoftDeletes extends TestPendingDeletes {
     List<Integer> docsDeleted = Arrays.asList(1, DocIdSetIterator.NO_MORE_DOCS);
     List<DocValuesFieldUpdates> updates = Arrays.asList(singleUpdate(docsDeleted, 3));
     for (DocValuesFieldUpdates update : updates) {
-      deletes.onDocValuesUpdate(update.field, update.iterator());
+      deletes.onDocValuesUpdate(fieldInfo, update.iterator());
     }
-    deletes.onDocValuesUpdate(fieldInfo);
     assertEquals(1, deletes.numPendingDeletes());
     assertTrue(deletes.getLiveDocs().get(0));
     assertFalse(deletes.getLiveDocs().get(1));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/591fc662/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
index b868a2e..5f1ba6c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesRetentionMergePolicy.java
@@ -289,6 +289,9 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
               writer.softUpdateDocument(new Term("id", id), doc,
                   new NumericDocValuesField("soft_delete", 1));
             }
+            if (rarely()) {
+              writer.flush();
+            }
             ids.add(id);
           }
         } catch (IOException | InterruptedException e) {
@@ -382,13 +385,13 @@ public class TestSoftDeletesRetentionMergePolicy extends LuceneTestCase {
     Document tombstone = new Document();
     tombstone.add(new NumericDocValuesField("soft_delete", 1));
     writer.softUpdateDocument(new Term("id", "1"), tombstone, new NumericDocValuesField("soft_delete", 1));
-    // Internally, forceMergeDeletes will call flush to flush pending updates
+    writer.forceMergeDeletes(true); // Internally, forceMergeDeletes will call flush to flush pending updates
     // Thus, we will have two segments - both having soft-deleted documents.
     // We expect any MP to merge these segments into one segment
     // when calling forceMergeDeletes.
-    writer.forceMergeDeletes(true);
-    assertEquals(1, writer.maxDoc());
     assertEquals(1, writer.segmentInfos.asList().size());
+    assertEquals(1, writer.numDocs());
+    assertEquals(1, writer.maxDoc());
     writer.close();
     dir.close();
   }