You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2020/06/03 19:18:22 UTC
[lucene-solr] 02/02: Fix case where mergeOnCommit would attempt to
delete files twice in the presence of deletions
This is an automated email from the ASF dual-hosted git repository.
sokolov pushed a commit to branch jira/lucene-8962
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit cab5ef5e6f2bdcda59fd669a298ec1377777af9d
Author: Michael Sokolov <so...@amazon.com>
AuthorDate: Wed Jun 3 15:12:02 2020 -0400
Fix case where mergeOnCommit would attempt to delete files twice in the presence of deletions
---
.../java/org/apache/lucene/index/IndexWriter.java | 3 +-
.../org/apache/lucene/index/TestIndexWriter.java | 141 +++++++++++++--------
2 files changed, 91 insertions(+), 53 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 88fdb90..13e0443 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -3169,13 +3169,13 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
// Resolve "live" SegmentInfos segments to their toCommit cloned equivalents, based on segment name.
Set<String> mergedSegmentNames = new HashSet<>();
for (SegmentCommitInfo sci : this.segments) {
- deleter.decRef(sci.files());
mergedSegmentNames.add(sci.info.name);
}
List<SegmentCommitInfo> toCommitMergedAwaySegments = new ArrayList<>();
for (SegmentCommitInfo sci : toCommit) {
if (mergedSegmentNames.contains(sci.info.name)) {
toCommitMergedAwaySegments.add(sci);
+ deleter.decRef(sci.files());
}
}
// Construct a OneMerge that applies to toCommit
@@ -4593,6 +4593,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable,
// Merge would produce a 0-doc segment, so we do nothing except commit the merge to remove all the 0-doc segments that we "merged":
assert merge.info.info.maxDoc() == 0;
commitMerge(merge, mergeState);
+ success = true;
return 0;
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
index 8fb1ce5..7590b1a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -344,7 +344,7 @@ public class TestIndexWriter extends LuceneTestCase {
// Make sure it's OK to change RAM buffer size and
// maxBufferedDocs in a write session
public void testChangingRAMBuffer() throws IOException {
- Directory dir = newDirectory();
+ Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
writer.getConfig().setMaxBufferedDocs(10);
writer.getConfig().setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
@@ -607,7 +607,7 @@ public class TestIndexWriter extends LuceneTestCase {
doc.add(newField("content4", contents, customType));
type = customType;
} else
- type = TextField.TYPE_NOT_STORED;
+ type = TextField.TYPE_NOT_STORED;
doc.add(newTextField("content1", contents, Field.Store.NO));
doc.add(newField("content3", "", customType));
doc.add(newField("content5", "", type));
@@ -663,13 +663,13 @@ public class TestIndexWriter extends LuceneTestCase {
writer.close();
dir.close();
}
-
+
public void testEmptyFieldNameTerms() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
Document doc = new Document();
doc.add(newTextField("", "a b c", Field.Store.NO));
- writer.addDocument(doc);
+ writer.addDocument(doc);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
LeafReader subreader = getOnlyLeafReader(reader);
@@ -681,7 +681,7 @@ public class TestIndexWriter extends LuceneTestCase {
reader.close();
dir.close();
}
-
+
public void testEmptyFieldNameWithEmptyTerm() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
@@ -690,7 +690,7 @@ public class TestIndexWriter extends LuceneTestCase {
doc.add(newStringField("", "a", Field.Store.NO));
doc.add(newStringField("", "b", Field.Store.NO));
doc.add(newStringField("", "c", Field.Store.NO));
- writer.addDocument(doc);
+ writer.addDocument(doc);
writer.close();
DirectoryReader reader = DirectoryReader.open(dir);
LeafReader subreader = getOnlyLeafReader(reader);
@@ -834,7 +834,7 @@ public class TestIndexWriter extends LuceneTestCase {
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
-
+
doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType));
writer.addDocument(doc);
writer.addDocument(doc);
@@ -922,7 +922,7 @@ public class TestIndexWriter extends LuceneTestCase {
// open/close slowly sometimes
dir.setUseSlowOpenClosers(true);
-
+
// throttle a little
dir.setThrottling(MockDirectoryWrapper.Throttling.SOMETIMES);
@@ -1148,7 +1148,7 @@ public class TestIndexWriter extends LuceneTestCase {
FieldType customType = new FieldType(StoredField.TYPE);
customType.setTokenized(true);
-
+
Field f = new Field("binary", b, 10, 17, customType);
// TODO: this is evil, changing the type after creating the field:
customType.setIndexOptions(IndexOptions.DOCS);
@@ -1157,7 +1157,7 @@ public class TestIndexWriter extends LuceneTestCase {
f.setTokenStream(doc1field1);
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
-
+
Field f2 = newField("string", "value", customType2);
final MockTokenizer doc1field2 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
doc1field2.setReader(new StringReader("doc1field2"));
@@ -1233,7 +1233,7 @@ public class TestIndexWriter extends LuceneTestCase {
public void testDeleteUnusedFiles() throws Exception {
assumeFalse("test relies on exact filenames", Codec.getDefault() instanceof SimpleTextCodec);
assumeWorkingMMapOnWindows();
-
+
for(int iter=0;iter<2;iter++) {
// relies on windows semantics
Path path = createTempDir();
@@ -1250,7 +1250,7 @@ public class TestIndexWriter extends LuceneTestCase {
}
MergePolicy mergePolicy = newLogMergePolicy(true);
-
+
// This test expects all of its segments to be in CFS
mergePolicy.setNoCFSRatio(1.0);
mergePolicy.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
@@ -1338,7 +1338,7 @@ public class TestIndexWriter extends LuceneTestCase {
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
-
+
doc.add(newField("c", "val", customType));
writer.addDocument(doc);
writer.commit();
@@ -1379,7 +1379,7 @@ public class TestIndexWriter extends LuceneTestCase {
// indexed, flushed (but not committed) and then IW rolls back, then no
// files are left in the Directory.
Directory dir = newDirectory();
-
+
String[] origFiles = dir.listAll();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(2)
@@ -1409,8 +1409,8 @@ public class TestIndexWriter extends LuceneTestCase {
// Adding just one document does not call flush yet.
int computedExtraFileCount = 0;
for (String file : dir.listAll()) {
- if (IndexWriter.WRITE_LOCK_NAME.equals(file) ||
- file.startsWith(IndexFileNames.SEGMENTS) ||
+ if (IndexWriter.WRITE_LOCK_NAME.equals(file) ||
+ file.startsWith(IndexFileNames.SEGMENTS) ||
IndexFileNames.CODEC_FILE_PATTERN.matcher(file).matches()) {
if (file.lastIndexOf('.') < 0
// don't count stored fields and term vectors in, or any temporary files they might
@@ -1458,7 +1458,7 @@ public class TestIndexWriter extends LuceneTestCase {
FieldType customType3 = new FieldType(TextField.TYPE_STORED);
customType3.setTokenized(false);
customType3.setOmitNorms(true);
-
+
for (int i=0; i<2; i++) {
Document doc = new Document();
doc.add(new Field("id", Integer.toString(i)+BIG, customType3));
@@ -1478,7 +1478,7 @@ public class TestIndexWriter extends LuceneTestCase {
SegmentReader sr = (SegmentReader) ctx.reader();
assertFalse(sr.getFieldInfos().hasVectors());
}
-
+
r0.close();
dir.close();
}
@@ -1501,7 +1501,7 @@ public class TestIndexWriter extends LuceneTestCase {
@Override
public final boolean incrementToken() {
- clearAttributes();
+ clearAttributes();
if (upto < tokens.length) {
termAtt.setEmpty();
termAtt.append(tokens[upto]);
@@ -1724,7 +1724,7 @@ public class TestIndexWriter extends LuceneTestCase {
r.close();
dir.close();
}
-
+
public void testDontInvokeAnalyzerForUnAnalyzedFields() throws Exception {
Analyzer analyzer = new Analyzer() {
@Override
@@ -1759,13 +1759,13 @@ public class TestIndexWriter extends LuceneTestCase {
w.close();
dir.close();
}
-
+
//LUCENE-1468 -- make sure opening an IndexWriter with
// create=true does not remove non-index files
-
+
public void testOtherFiles() throws Throwable {
Directory dir = newDirectory();
- IndexWriter iw = new IndexWriter(dir,
+ IndexWriter iw = new IndexWriter(dir,
newIndexWriterConfig(new MockAnalyzer(random())));
iw.addDocument(new Document());
iw.close();
@@ -1774,15 +1774,15 @@ public class TestIndexWriter extends LuceneTestCase {
IndexOutput out = dir.createOutput("myrandomfile", newIOContext(random()));
out.writeByte((byte) 42);
out.close();
-
+
new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))).close();
-
+
assertTrue(slowFileExists(dir, "myrandomfile"));
} finally {
dir.close();
}
}
-
+
// LUCENE-3849
public void testStopwordsPosIncHole() throws Exception {
Directory dir = newDirectory();
@@ -1811,7 +1811,7 @@ public class TestIndexWriter extends LuceneTestCase {
ir.close();
dir.close();
}
-
+
// LUCENE-3849
public void testStopwordsPosIncHole2() throws Exception {
// use two stopfilters for testing here
@@ -1843,23 +1843,23 @@ public class TestIndexWriter extends LuceneTestCase {
ir.close();
dir.close();
}
-
+
// LUCENE-4575
public void testCommitWithUserDataOnly() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(null));
writer.commit(); // first commit to complete IW create transaction.
-
+
// this should store the commit data, even though no other changes were made
writer.setLiveCommitData(new HashMap<String,String>() {{
put("key", "value");
}}.entrySet());
writer.commit();
-
+
DirectoryReader r = DirectoryReader.open(dir);
assertEquals("value", r.getIndexCommit().getUserData().get("key"));
r.close();
-
+
// now check setCommitData and prepareCommit/commit sequence
writer.setLiveCommitData(new HashMap<String,String>() {{
put("key", "value1");
@@ -1873,7 +1873,7 @@ public class TestIndexWriter extends LuceneTestCase {
r = DirectoryReader.open(dir);
assertEquals("value1", r.getIndexCommit().getUserData().get("key"));
r.close();
-
+
// now should commit the second commitData - there was a bug where
// IndexWriter.finishCommit overrode the second commitData
writer.commit();
@@ -1881,7 +1881,7 @@ public class TestIndexWriter extends LuceneTestCase {
assertEquals("IndexWriter.finishCommit may have overridden the second commitData",
"value2", r.getIndexCommit().getUserData().get("key"));
r.close();
-
+
writer.close();
dir.close();
}
@@ -1896,7 +1896,7 @@ public class TestIndexWriter extends LuceneTestCase {
}
return data;
}
-
+
@Test
public void testGetCommitData() throws Exception {
Directory dir = newDirectory();
@@ -1906,16 +1906,16 @@ public class TestIndexWriter extends LuceneTestCase {
}}.entrySet());
assertEquals("value", getLiveCommitData(writer).get("key"));
writer.close();
-
+
// validate that it's also visible when opening a new IndexWriter
writer = new IndexWriter(dir, newIndexWriterConfig(null)
.setOpenMode(OpenMode.APPEND));
assertEquals("value", getLiveCommitData(writer).get("key"));
writer.close();
-
+
dir.close();
}
-
+
public void testNullAnalyzer() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwConf = newIndexWriterConfig(null);
@@ -1942,7 +1942,7 @@ public class TestIndexWriter extends LuceneTestCase {
iw.close();
dir.close();
}
-
+
public void testNullDocument() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -1967,7 +1967,7 @@ public class TestIndexWriter extends LuceneTestCase {
iw.close();
dir.close();
}
-
+
public void testNullDocuments() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -1992,7 +1992,7 @@ public class TestIndexWriter extends LuceneTestCase {
iw.close();
dir.close();
}
-
+
public void testIterableFieldThrowsException() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
@@ -2000,7 +2000,7 @@ public class TestIndexWriter extends LuceneTestCase {
int docCount = 0;
int docId = 0;
Set<String> liveIds = new HashSet<>();
- for (int i = 0; i < iters; i++) {
+ for (int i = 0; i < iters; i++) {
int numDocs = atLeast(4);
for (int j = 0; j < numDocs; j++) {
String id = Integer.toString(docId++);
@@ -2008,7 +2008,7 @@ public class TestIndexWriter extends LuceneTestCase {
fields.add(new StringField("id", id, Field.Store.YES));
fields.add(new StringField("foo", TestUtil.randomSimpleString(random()), Field.Store.NO));
docId++;
-
+
boolean success = false;
try {
w.addDocument(new RandomFailingIterable<IndexableField>(fields, random()));
@@ -2040,7 +2040,7 @@ public class TestIndexWriter extends LuceneTestCase {
w.close();
IOUtils.close(reader, dir);
}
-
+
public void testIterableThrowsException() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
@@ -2088,7 +2088,7 @@ public class TestIndexWriter extends LuceneTestCase {
w.close();
IOUtils.close(reader, dir);
}
-
+
public void testIterableThrowsException2() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
@@ -2128,7 +2128,7 @@ public class TestIndexWriter extends LuceneTestCase {
this.list = list;
this.failOn = random.nextInt(5);
}
-
+
@Override
public Iterator<T> iterator() {
final Iterator<? extends T> docIter = list.iterator();
@@ -2254,7 +2254,7 @@ public class TestIndexWriter extends LuceneTestCase {
writer.close();
dir.close();
}
-
+
public void testMergeAllDeleted() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
@@ -2477,12 +2477,12 @@ public class TestIndexWriter extends LuceneTestCase {
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
w.addDocument(new Document());
w.close();
-
+
SegmentInfos sis = SegmentInfos.readLatestCommit(d);
byte[] id1 = sis.getId();
assertNotNull(id1);
assertEquals(StringHelper.ID_LENGTH, id1.length);
-
+
byte[] id2 = sis.info(0).info.getId();
byte[] sciId2 = sis.info(0).getId();
assertNotNull(id2);
@@ -2514,7 +2514,7 @@ public class TestIndexWriter extends LuceneTestCase {
ids.add(id);
}
}
-
+
public void testEmptyNorm() throws Exception {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random())));
@@ -2579,7 +2579,7 @@ public class TestIndexWriter extends LuceneTestCase {
assertEquals(1, r2.getIndexCommit().getGeneration());
assertEquals("segments_1", r2.getIndexCommit().getSegmentsFileName());
r2.close();
-
+
// make a change and another commit
w.addDocument(new Document());
w.commit();
@@ -2866,7 +2866,7 @@ public class TestIndexWriter extends LuceneTestCase {
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
w.close();
-
+
IndexOutput out = dir.createTempOutput("_0", "bkd", IOContext.DEFAULT);
String tempName = out.getName();
out.close();
@@ -3151,7 +3151,7 @@ public class TestIndexWriter extends LuceneTestCase {
expectThrows(IllegalArgumentException.class, () -> {
writer.softUpdateDocument(null, new Document(), new NumericDocValuesField("soft_delete", 1));
});
-
+
expectThrows(IllegalArgumentException.class, () -> {
writer.softUpdateDocument(new Term("id", "1"), new Document());
});
@@ -4167,4 +4167,41 @@ public class TestIndexWriter extends LuceneTestCase {
}
}
}
+
+ public void testMergeOnCommitKeepFullyDeletedSegments() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ iwc.mergePolicy = new FilterMergePolicy(newMergePolicy()) {
+ @Override
+ public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) {
+ return true;
+ }
+
+ @Override
+ public MergeSpecification findFullFlushMerges(MergeTrigger mergeTrigger,
+ SegmentInfos segmentInfos,
+ MergeContext mergeContext) {
+ List<SegmentCommitInfo> fullyDeletedSegments = segmentInfos.asList().stream()
+ .filter(s -> s.info.maxDoc() - s.getDelCount() == 0)
+ .collect(Collectors.toList());
+ if (fullyDeletedSegments.isEmpty()) {
+ return null;
+ }
+ MergeSpecification spec = new MergeSpecification();
+ spec.add(new OneMerge(fullyDeletedSegments));
+ return spec;
+ }
+ };
+ IndexWriter w = new IndexWriter(dir, iwc);
+ Document d = new Document();
+ d.add(new StringField("id", "1", Field.Store.YES));
+ w.addDocument(d);
+ w.commit();
+ w.updateDocument(new Term("id", "1"), d);
+ w.commit();
+ try (DirectoryReader reader = w.getReader()) {
+ assertEquals(1, reader.numDocs());
+ }
+ IOUtils.close(w, dir);
+ }
}