You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2020/09/15 08:15:43 UTC

[lucene-solr] branch master updated: LUCENE-9516: Remove DocConsumer and IndexingChain from Lucene (#1867)

This is an automated email from the ASF dual-hosted git repository.

simonw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new f655d97  LUCENE-9516: Remove DocConsumer and IndexingChain from Lucene (#1867)
f655d97 is described below

commit f655d97b54f426d9b590e7f853e8de43e50e5164
Author: Simon Willnauer <si...@apache.org>
AuthorDate: Tue Sep 15 10:15:24 2020 +0200

    LUCENE-9516: Remove DocConsumer and IndexingChain from Lucene (#1867)
    
    This removes the ability to replace the IndexingChain / DocConsumer
    in Lucenes IndexWriter. The interface is not sufficient to efficiently
    replace the functionality with reasonable efforts. It also seems it's completely
    unused at this point and hasn't been maintained in years.
---
 lucene/CHANGES.txt                                 |  4 +++
 .../java/org/apache/lucene/index/DocConsumer.java  | 36 --------------------
 .../lucene/index/DocumentsWriterPerThread.java     | 38 +++++-----------------
 ...efaultIndexingChain.java => IndexingChain.java} | 17 ++++------
 .../apache/lucene/index/LiveIndexWriterConfig.java | 13 --------
 .../org/apache/lucene/index/TestIndexWriter.java   |  2 +-
 .../apache/lucene/index/TestIndexWriterConfig.java | 17 ----------
 .../lucene/index/TestIndexWriterWithThreads.java   |  2 +-
 8 files changed, 20 insertions(+), 109 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 62306fe..9282d5a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -61,6 +61,10 @@ API Changes
 * LUCENE-9462: Fields without positions should still return MatchIterator.
   (Alan Woodward, Dawid Weiss)
 
+* LUCENE-9516: Removed the ability to replace the IndexingChain / DocConsumer
+  in Lucenes IndexWriter. The interface is not sufficient to efficiently
+  replace the functionality with reasonable efforts. (Simon Willnauer)
+
 Improvements
 
 * LUCENE-9463: Query match region retrieval component, passage scoring and formatting
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java b/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java
deleted file mode 100644
index eff4433..0000000
--- a/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.index;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.Accountable;
-
-abstract class DocConsumer implements Accountable {
-  abstract void processDocument(int docId, Iterable<? extends IndexableField> document) throws IOException;
-  abstract Sorter.DocMap flush(final SegmentWriteState state) throws IOException;
-  abstract void abort() throws IOException;
-
-  /**
-   * Returns a {@link DocIdSetIterator} for the given field or null if the field doesn't have
-   * doc values.
-   */
-  abstract DocIdSetIterator getHasDocValues(String field);
-
-}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
index c94e856..d772a9d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@@ -27,7 +27,6 @@ import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.locks.ReentrantLock;
-import java.util.function.Consumer;
 
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
@@ -47,17 +46,6 @@ import org.apache.lucene.util.Version;
 
 final class DocumentsWriterPerThread implements Accountable {
 
-  /**
-   * The IndexingChain must define the {@link #getChain(int, SegmentInfo, Directory, FieldInfos.Builder, LiveIndexWriterConfig, Consumer)} method
-   * which returns the DocConsumer that the DocumentsWriter calls to process the
-   * documents.
-   */
-  abstract static class IndexingChain {
-    abstract DocConsumer getChain(int indexCreatedVersionMajor, SegmentInfo segmentInfo, Directory directory,
-                                  FieldInfos.Builder fieldInfos, LiveIndexWriterConfig indexWriterConfig,
-                                  Consumer<Throwable> abortingExceptionConsumer);
-  }
-
   private Throwable abortingException;
 
   private void onAbortingException(Throwable throwable) {
@@ -70,16 +58,6 @@ final class DocumentsWriterPerThread implements Accountable {
     return aborted;
   }
 
-  static final IndexingChain defaultIndexingChain = new IndexingChain() {
-
-    @Override
-    DocConsumer getChain(int indexCreatedVersionMajor, SegmentInfo segmentInfo, Directory directory,
-                         FieldInfos.Builder fieldInfos, LiveIndexWriterConfig indexWriterConfig,
-                         Consumer<Throwable> abortingExceptionConsumer) {
-      return new DefaultIndexingChain(indexCreatedVersionMajor, segmentInfo, directory, fieldInfos, indexWriterConfig, abortingExceptionConsumer);
-    }
-  };
-
   static final class FlushedSegment {
     final SegmentCommitInfo segmentInfo;
     final FieldInfos fieldInfos;
@@ -111,7 +89,7 @@ final class DocumentsWriterPerThread implements Accountable {
         infoStream.message("DWPT", "now abort");
       }
       try {
-        consumer.abort();
+        indexingChain.abort();
       } finally {
         pendingUpdates.clear();
       }
@@ -124,7 +102,7 @@ final class DocumentsWriterPerThread implements Accountable {
   private final static boolean INFO_VERBOSE = false;
   final Codec codec;
   final TrackingDirectoryWrapper directory;
-  private final DocConsumer consumer;
+  private final IndexingChain indexingChain;
 
   // Updates for our still-in-RAM (to be flushed next) segment
   private final BufferedUpdates pendingUpdates;
@@ -167,7 +145,7 @@ final class DocumentsWriterPerThread implements Accountable {
       infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);  
     }
     this.enableTestPoints = enableTestPoints;
-    consumer = indexWriterConfig.getIndexingChain().getChain(indexVersionCreated, segmentInfo, this.directory, fieldInfos, indexWriterConfig, this::onAbortingException);
+    indexingChain = new IndexingChain(indexVersionCreated, segmentInfo, this.directory, fieldInfos, indexWriterConfig, this::onAbortingException);
   }
   
   final void testPoint(String message) {
@@ -205,7 +183,7 @@ final class DocumentsWriterPerThread implements Accountable {
           // it's very hard to fix (we can't easily distinguish aborting
           // vs non-aborting exceptions):
           reserveOneDoc();
-          consumer.processDocument(numDocsInRAM++, doc);
+          indexingChain.processDocument(numDocsInRAM++, doc);
         }
         allDocsIndexed = true;
         return finishDocuments(deleteNode, docsInRamBefore);
@@ -343,11 +321,11 @@ final class DocumentsWriterPerThread implements Accountable {
     try {
       DocIdSetIterator softDeletedDocs;
       if (indexWriterConfig.getSoftDeletesField() != null) {
-        softDeletedDocs = consumer.getHasDocValues(indexWriterConfig.getSoftDeletesField());
+        softDeletedDocs = indexingChain.getHasDocValues(indexWriterConfig.getSoftDeletesField());
       } else {
         softDeletedDocs = null;
       }
-      sortMap = consumer.flush(flushState);
+      sortMap = indexingChain.flush(flushState);
       if (softDeletedDocs == null) {
         flushState.softDelCountOnFlush = 0;
       } else {
@@ -518,12 +496,12 @@ final class DocumentsWriterPerThread implements Accountable {
 
   @Override
   public long ramBytesUsed() {
-    return (deleteDocIDs.length  * Integer.BYTES)+ pendingUpdates.ramBytesUsed() + consumer.ramBytesUsed();
+    return (deleteDocIDs.length  * Integer.BYTES)+ pendingUpdates.ramBytesUsed() + indexingChain.ramBytesUsed();
   }
 
   @Override
   public Collection<Accountable> getChildResources() {
-    return List.of(pendingUpdates, consumer);
+    return List.of(pendingUpdates, indexingChain);
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
similarity index 98%
rename from lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
rename to lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
index 56406ee..f40303b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexingChain.java
@@ -57,8 +57,7 @@ import org.apache.lucene.util.RamUsageEstimator;
 
 /** Default general purpose indexing chain, which handles
  *  indexing all types of fields. */
-final class DefaultIndexingChain extends DocConsumer {
-
+final class IndexingChain implements Accountable {
 
   final Counter bytesUsed = Counter.newCounter();
   final FieldInfos.Builder fieldInfos;
@@ -88,8 +87,8 @@ final class DefaultIndexingChain extends DocConsumer {
   private final Consumer<Throwable> abortingExceptionConsumer;
   private boolean hasHitAbortingException;
 
-  DefaultIndexingChain(int indexCreatedVersionMajor, SegmentInfo segmentInfo, Directory directory, FieldInfos.Builder fieldInfos, LiveIndexWriterConfig indexWriterConfig,
-                       Consumer<Throwable> abortingExceptionConsumer) {
+  IndexingChain(int indexCreatedVersionMajor, SegmentInfo segmentInfo, Directory directory, FieldInfos.Builder fieldInfos, LiveIndexWriterConfig indexWriterConfig,
+                Consumer<Throwable> abortingExceptionConsumer) {
     this.indexCreatedVersionMajor = indexCreatedVersionMajor;
     byteBlockAllocator = new ByteBlockPool.DirectTrackingAllocator(bytesUsed);
     IntBlockPool.Allocator intBlockAllocator = new IntBlockAllocator(bytesUsed);
@@ -207,8 +206,7 @@ final class DefaultIndexingChain extends DocConsumer {
     return sorter.sort(state.segmentInfo.maxDoc(), comparators.toArray(IndexSorter.DocComparator[]::new));
   }
 
-  @Override
-  public Sorter.DocMap flush(SegmentWriteState state) throws IOException {
+  Sorter.DocMap flush(SegmentWriteState state) throws IOException {
 
     // NOTE: caller (DocumentsWriterPerThread) handles
     // aborting on any exception from this method
@@ -408,9 +406,8 @@ final class DefaultIndexingChain extends DocConsumer {
     }
   }
 
-  @Override
   @SuppressWarnings("try")
-  public void abort() throws IOException{
+  void abort() throws IOException{
     // finalizer will e.g. close any open files in the term vectors writer:
     try (Closeable finalizer = termsHash::abort){
       storedFieldsConsumer.abort();
@@ -464,8 +461,7 @@ final class DefaultIndexingChain extends DocConsumer {
     }
   }
 
-  @Override
-  public void processDocument(int docID, Iterable<? extends IndexableField> document) throws IOException {
+  void processDocument(int docID, Iterable<? extends IndexableField> document) throws IOException {
 
     // How many indexed field names we've seen (collapses
     // multiple field instances by the same name):
@@ -1008,7 +1004,6 @@ final class DefaultIndexingChain extends DocConsumer {
     }
   }
 
-  @Override
   DocIdSetIterator getHasDocValues(String field) {
     PerField perField = getPerField(field);
     if (perField != null) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
index f979984..756be0c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
@@ -22,7 +22,6 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.search.IndexSearcher;
@@ -67,10 +66,6 @@ public class LiveIndexWriterConfig {
   /** {@link MergeScheduler} to use for running merges. */
   protected volatile MergeScheduler mergeScheduler;
 
-  /** {@link IndexingChain} that determines how documents are
-   *  indexed. */
-  protected volatile IndexingChain indexingChain;
-
   /** {@link Codec} used to write new segments. */
   protected volatile Codec codec;
 
@@ -124,7 +119,6 @@ public class LiveIndexWriterConfig {
     openMode = OpenMode.CREATE_OR_APPEND;
     similarity = IndexSearcher.getDefaultSimilarity();
     mergeScheduler = new ConcurrentMergeScheduler();
-    indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
     codec = Codec.getDefault();
     if (codec == null) {
       throw new NullPointerException();
@@ -354,13 +348,6 @@ public class LiveIndexWriterConfig {
   }
 
   /**
-   * Returns the indexing chain.
-   */
-  IndexingChain getIndexingChain() {
-    return indexingChain;
-  }
-
-  /**
    * Returns the max amount of memory each {@link DocumentsWriterPerThread} can
    * consume until forcefully flushed.
    * 
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index f428b94..fc085ae 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -3385,7 +3385,7 @@ public class TestIndexWriter extends LuceneTestCase {
     try (Directory dir = new FilterDirectory(newDirectory()) {
       @Override
       public IndexOutput createOutput(String name, IOContext context) throws IOException {
-        if (callStackContains(DefaultIndexingChain.class, "flush")) {
+        if (callStackContains(IndexingChain.class, "flush")) {
           try {
             inFlush.countDown();
             latch.await();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
index a1cef6c..56db27e 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
@@ -22,13 +22,11 @@ import java.lang.reflect.Method;
 import java.lang.reflect.Modifier;
 import java.util.HashSet;
 import java.util.Set;
-import java.util.function.Consumer;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
@@ -43,17 +41,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
     // Does not implement anything - used only for type checking on IndexWriterConfig.
   }
 
-  private static final class MyIndexingChain extends IndexingChain {
-    @Override
-    DocConsumer getChain(int indexCreatedVersionMajor, SegmentInfo segmentInfo, Directory directory,
-                         FieldInfos.Builder fieldInfos, LiveIndexWriterConfig indexWriterConfig,
-                         Consumer<Throwable> abortingExceptionConsumer) {
-      return null;
-    }
-    // Does not implement anything - used only for type checking on IndexWriterConfig.
-
-  }
-
   @Test
   public void testDefaults() throws Exception {
     IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
@@ -67,7 +54,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
     assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(), 0.0);
     assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs());
     assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling());
-    assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
     assertNull(conf.getMergedSegmentWarmer());
     assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
     assertEquals(FlushByRamOrCountsPolicy.class, conf.getFlushPolicy().getClass());
@@ -232,9 +218,6 @@ public class TestIndexWriterConfig extends LuceneTestCase {
       conf.setSimilarity(null);
     });
 
-    // Test IndexingChain
-    assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
-
     expectThrows(IllegalArgumentException.class, () -> {
       conf.setMaxBufferedDocs(1);
     });
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
index c4f379e..347f744 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
@@ -453,7 +453,7 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
     @Override
     public void eval(MockDirectoryWrapper dir)  throws IOException {
       if (doFail) {
-        if (callStackContains(DefaultIndexingChain.class, "flush")) {
+        if (callStackContains(IndexingChain.class, "flush")) {
           if (onlyOnce)
             doFail = false;
           //System.out.println(Thread.currentThread().getName() + ": NOW FAIL: onlyOnce=" + onlyOnce);