You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2020/12/19 15:12:05 UTC
[lucene-solr] branch branch_8x updated (9a53155 -> 42cc099)
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a change to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.
from 9a53155 LUCENE-9635: BM25FQuery - Mask encoded norm long value in array lookup to avoid negative norms in long documents (#2138)
new ce130e3 LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() (#2088)
new 505952e LUCENE-9617: move CHANGES.txt entry to right place (9.0 -> 8.8.0)
new 42cc099 LUCENE-9617: small 8.x specific fixes to test case sources
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
lucene/CHANGES.txt | 3 ++
.../java/org/apache/lucene/index/FieldInfos.java | 1 +
.../org/apache/lucene/index/TestFieldInfos.java | 16 ++++++
.../apache/lucene/index/TestIndexWriterDelete.java | 57 ++++++++++++++++++++--
4 files changed, 72 insertions(+), 5 deletions(-)
[lucene-solr] 03/03: LUCENE-9617: small 8.x specific fixes to test
case sources
Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 42cc099895e145e6f844e2a25d2686cf215b0aaf
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Sat Dec 19 10:11:24 2020 -0500
LUCENE-9617: small 8.x specific fixes to test case sources
---
lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
index 735d1c3..9d27bc5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
@@ -192,18 +192,15 @@ public class TestFieldInfos extends LuceneTestCase {
FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes");
for (int i = 0; i < 10; i++) {
fieldNumbers.addOrGet("field" + i, -1, IndexOptions.NONE, DocValuesType.NONE,
- 0, 0, 0, 0,
- VectorValues.SearchStrategy.NONE, false);
+ 0, 0, 0, false);
}
int idx = fieldNumbers.addOrGet("EleventhField", -1, IndexOptions.NONE, DocValuesType.NONE,
- 0, 0, 0, 0,
- VectorValues.SearchStrategy.NONE, false);
+ 0, 0, 0, false);
assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
fieldNumbers.clear();
idx = fieldNumbers.addOrGet("PostClearField", -1, IndexOptions.NONE, DocValuesType.NONE,
- 0, 0, 0, 0,
- VectorValues.SearchStrategy.NONE, false);
+ 0, 0, 0, false);
assertEquals("Field numbers should reset after clear()", 0, idx);
}
}
[lucene-solr] 02/03: LUCENE-9617: move CHANGES.txt entry to right
place (9.0 -> 8.8.0)
Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit 505952ea3fb4ddf4bd5fc67be96b191c72bc97c1
Author: Mike McCandless <mi...@apache.org>
AuthorDate: Fri Dec 18 16:45:11 2020 -0500
LUCENE-9617: move CHANGES.txt entry to right place (9.0 -> 8.8.0)
[lucene-solr] 01/03: LUCENE-9617: Reset lowestUnassignedFieldNumber
in FieldNumbers.clear() (#2088)
Posted by mi...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit ce130e3e00909e999e723c7ac662d164b51bc3a4
Author: msfroh <ms...@gmail.com>
AuthorDate: Fri Dec 18 13:30:33 2020 -0800
LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear() (#2088)
* LUCENE-9617: Reset lowestUnassignedFieldNumber in FieldNumbers.clear()
FieldNumbers.clear() is called from IndexWriter.deleteAll(), which is
supposed to completely reset the state of the index. This includes
clearing all known fields.
Prior to this change, it would allocate progressively higher field
numbers, which results in larger and larger arrays for
FieldInfos.byNumber, effectively "leaking" field numbers every time
deleteAll() is called.
Co-authored-by: Michael Froh <fr...@amazon.com>
---
lucene/CHANGES.txt | 3 ++
.../java/org/apache/lucene/index/FieldInfos.java | 1 +
.../org/apache/lucene/index/TestFieldInfos.java | 19 ++++++++
.../apache/lucene/index/TestIndexWriterDelete.java | 57 ++++++++++++++++++++--
4 files changed, 75 insertions(+), 5 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6e39b2e..373af94 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,9 @@ Bug Fixes
* LUCENE-9635: BM25FQuery - Mask encoded norm long value in array lookup.
(Yilun Cui)
+* LUCENE-9617: Fix per-field memory leak in IndexWriter.deleteAll(). Reset next available internal
+ field number to 0 on FieldInfos.clear(), to avoid wasting FieldInfo references. (Michael Froh)
+
Other
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
index 4a867df..eb4ea3a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@@ -435,6 +435,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
indexOptions.clear();
docValuesType.clear();
dimensions.clear();
+ lowestUnassignedFieldNumber = -1;
}
synchronized void setIndexOptions(int number, String name, IndexOptions indexOptions) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
index 321fcf7..735d1c3 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
@@ -187,4 +187,23 @@ public class TestFieldInfos extends LuceneTestCase {
writer.close();
dir.close();
}
+
+ public void testFieldNumbersAutoIncrement() {
+ FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes");
+ for (int i = 0; i < 10; i++) {
+ fieldNumbers.addOrGet("field" + i, -1, IndexOptions.NONE, DocValuesType.NONE,
+ 0, 0, 0, 0,
+ VectorValues.SearchStrategy.NONE, false);
+ }
+ int idx = fieldNumbers.addOrGet("EleventhField", -1, IndexOptions.NONE, DocValuesType.NONE,
+ 0, 0, 0, 0,
+ VectorValues.SearchStrategy.NONE, false);
+ assertEquals("Field numbers 0 through 9 were allocated", 10, idx);
+
+ fieldNumbers.clear();
+ idx = fieldNumbers.addOrGet("PostClearField", -1, IndexOptions.NONE, DocValuesType.NONE,
+ 0, 0, 0, 0,
+ VectorValues.SearchStrategy.NONE, false);
+ assertEquals("Field numbers should reset after clear()", 0, idx);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
index 06ca779..c00746c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
+import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -28,6 +29,7 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@@ -36,6 +38,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
@@ -355,17 +358,17 @@ public class TestIndexWriterDelete extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\nTEST: now final deleteAll");
}
-
+
modifier.deleteAll();
for (Thread thread : threads) {
thread.join();
}
-
+
if (VERBOSE) {
System.out.println("\nTEST: now close");
}
modifier.close();
-
+
DirectoryReader reader = DirectoryReader.open(dir);
if (VERBOSE) {
System.out.println("\nTEST: got reader=" + reader);
@@ -453,6 +456,50 @@ public class TestIndexWriterDelete extends LuceneTestCase {
dir.close();
}
+ // Verify that we can call deleteAll repeatedly without leaking field numbers such that we trigger OOME
+ // on creation of FieldInfos. See https://issues.apache.org/jira/browse/LUCENE-9617
+ @Nightly // Takes 1-2 minutes to run on a 16-core machine
+ public void testDeleteAllRepeated() throws IOException, InterruptedException {
+ final int breakingFieldCount = 50_000_000;
+ try (Directory dir = newDirectory()) {
+ // Avoid flushing until the end of the test to save time.
+ IndexWriterConfig conf = newIndexWriterConfig()
+ .setMaxBufferedDocs(1000)
+ .setRAMBufferSizeMB(1000)
+ .setRAMPerThreadHardLimitMB(1000)
+ .setCheckPendingFlushUpdate(false);
+ try (IndexWriter modifier = new IndexWriter(dir, conf)) {
+ Document document = new Document();
+ int fieldsPerDoc = 1_000;
+ for (int i = 0; i < fieldsPerDoc; i++) {
+ document.add(new StoredField("field" + i, ""));
+ }
+ AtomicLong numFields = new AtomicLong(0);
+ List<Thread> threads = new ArrayList<>();
+ int nThreads = atLeast(8);
+ for (int i = 0; i < nThreads; i++) {
+ Thread t = new Thread(() -> {
+ try {
+ while (numFields.getAndAdd(fieldsPerDoc) < breakingFieldCount) {
+ modifier.addDocument(document);
+ modifier.deleteAll();
+ }
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ });
+ t.start();
+ threads.add(t);
+ }
+ for (Thread t : threads) {
+ t.join();
+ }
+ // Add one last document and flush to build FieldInfos.
+ modifier.addDocument(document);
+ modifier.flush();
+ }
+ }
+ }
private void updateDoc(IndexWriter modifier, int id, int value)
throws IOException {
@@ -944,7 +991,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
modifier.close();
dir.close();
}
-
+
public void testDeleteAllSlowly() throws Exception {
final Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@@ -982,7 +1029,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
w.close();
dir.close();
}
-
+
// TODO: this test can hit pathological cases (IW settings?) where it runs for far too long
@Nightly
public void testIndexingThenDeleting() throws Exception {