You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2020/12/18 21:34:23 UTC

[lucene-solr] branch master updated: LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() (#2088)

This is an automated email from the ASF dual-hosted git repository.

mikemccand pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 8e162e2  LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() (#2088)
8e162e2 is described below

commit 8e162e26708631099f673a9da41804ef440fda73
Author: msfroh <ms...@gmail.com>
AuthorDate: Fri Dec 18 13:30:33 2020 -0800

    LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() (#2088)
    
    * LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear()
    
    FieldNumbers.clear() is called from IndexWriter.deleteAll(), which is
    supposed to completely reset the state of the index. This includes
    clearing all known fields.
    
    Prior to this change, it would allocate progressively higher field
    numbers, which results in larger and  larger arrays for
    FieldInfos.byNumber, effectively "leaking" field numbers every time
    deleteAll() is called.
    
    Co-authored-by: Michael Froh <fr...@amazon.com>
---
 lucene/CHANGES.txt                                 |  3 ++
 .../java/org/apache/lucene/index/FieldInfos.java   |  1 +
 .../org/apache/lucene/index/TestFieldInfos.java    | 19 ++++++++
 .../apache/lucene/index/TestIndexWriterDelete.java | 57 ++++++++++++++++++++--
 4 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4a5f7ff..00aaf86 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -189,6 +189,9 @@ Bug fixes
 * LUCENE-9365: FuzzyQuery was missing matches when prefix length was equal to the term length
   (Mark Harwood, Mike Drob)
 
+* LUCENE-9617: Fix per-field memory leak in IndexWriter.deleteAll(). Reset next available internal
+  field number to 0 on FieldInfos.clear(), to avoid wasting FieldInfo references. (Michael Froh)
+
 Other
 
 * LUCENE-9631: Properly override slice() on subclasses of OffsetRange. (Dawid Weiss)
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
index 95755ff..9d081a2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@@ -481,6 +481,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
       indexOptions.clear();
       docValuesType.clear();
       dimensions.clear();
+      lowestUnassignedFieldNumber = -1;
     }
 
     synchronized void setIndexOptions(int number, String name, IndexOptions indexOptions) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
index 321fcf7..735d1c3 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
@@ -187,4 +187,23 @@ public class TestFieldInfos extends LuceneTestCase {
     writer.close();
     dir.close();
   }
+
+  public void testFieldNumbersAutoIncrement() {
+    FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes");
+    for (int i = 0; i < 10; i++) {
+      fieldNumbers.addOrGet("field" + i, -1, IndexOptions.NONE, DocValuesType.NONE,
+          0, 0, 0, 0,
+          VectorValues.SearchStrategy.NONE, false);
+    }
+    int idx = fieldNumbers.addOrGet("EleventhField", -1, IndexOptions.NONE, DocValuesType.NONE,
+        0, 0, 0, 0,
+        VectorValues.SearchStrategy.NONE, false);
+    assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
+
+    fieldNumbers.clear();
+    idx = fieldNumbers.addOrGet("PostClearField", -1, IndexOptions.NONE, DocValuesType.NONE,
+        0, 0, 0, 0,
+        VectorValues.SearchStrategy.NONE, false);
+    assertEquals("Field numbers should reset after clear()", 0, idx);
+  }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
index 1f9781a..69bf706 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.io.UncheckedIOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -28,6 +29,7 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -36,6 +38,7 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.ScoreDoc;
@@ -355,17 +358,17 @@ public class TestIndexWriterDelete extends LuceneTestCase {
     if (VERBOSE) {
       System.out.println("\nTEST: now final deleteAll");
     }
-    
+
     modifier.deleteAll();
     for (Thread thread : threads) {
       thread.join();
     }
-    
+
     if (VERBOSE) {
       System.out.println("\nTEST: now close");
     }
     modifier.close();
-    
+
     DirectoryReader reader = DirectoryReader.open(dir);
     if (VERBOSE) {
       System.out.println("\nTEST: got reader=" + reader);
@@ -453,6 +456,50 @@ public class TestIndexWriterDelete extends LuceneTestCase {
     dir.close();
   }
 
+  // Verify that we can call deleteAll repeatedly without leaking field numbers such that we trigger OOME
+  // on creation of FieldInfos. See https://issues.apache.org/jira/browse/LUCENE-9617
+  @Nightly // Takes 1-2 minutes to run on a 16-core machine
+  public void testDeleteAllRepeated() throws IOException, InterruptedException {
+    final int breakingFieldCount = 50_000_000;
+    try  (Directory dir = newDirectory()) {
+      // Avoid flushing until the end of the test to save time.
+      IndexWriterConfig conf = newIndexWriterConfig()
+              .setMaxBufferedDocs(1000)
+              .setRAMBufferSizeMB(1000)
+              .setRAMPerThreadHardLimitMB(1000)
+              .setCheckPendingFlushUpdate(false);
+      try (IndexWriter modifier = new IndexWriter(dir, conf)) {
+        Document document = new Document();
+        int fieldsPerDoc = 1_000;
+        for (int i = 0; i < fieldsPerDoc; i++) {
+          document.add(new StoredField("field" + i, ""));
+        }
+        AtomicLong numFields = new AtomicLong(0);
+        List<Thread> threads = new ArrayList<>();
+        int nThreads = atLeast(8);
+        for (int i = 0; i < nThreads; i++) {
+          Thread t = new Thread(() -> {
+            try {
+              while (numFields.getAndAdd(fieldsPerDoc) < breakingFieldCount) {
+                modifier.addDocument(document);
+                modifier.deleteAll();
+              }
+            } catch (IOException e) {
+              throw new UncheckedIOException(e);
+            }
+          });
+          t.start();
+          threads.add(t);
+        }
+        for (Thread t : threads) {
+          t.join();
+        }
+        // Add one last document and flush to build FieldInfos.
+        modifier.addDocument(document);
+        modifier.flush();
+      }
+    }
+  }
 
   private void updateDoc(IndexWriter modifier, int id, int value)
       throws IOException {
@@ -944,7 +991,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
     modifier.close();
     dir.close();
   }
-  
+
   public void testDeleteAllSlowly() throws Exception {
     final Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@@ -982,7 +1029,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
     w.close();
     dir.close();
   }
-  
+
   // TODO: this test can hit pathological cases (IW settings?) where it runs for far too long
   @Nightly
   public void testIndexingThenDeleting() throws Exception {