You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/08/14 18:43:23 UTC
svn commit: r1513955 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/index/ core/src/test/org/apache/lucene/index/
test-framework/src/java/org/apache/lucene/index/
test-framework/src/java/org/apache/lucene/util/
Author: rmuir
Date: Wed Aug 14 16:43:22 2013
New Revision: 1513955
URL: http://svn.apache.org/r1513955
Log:
LUCENE-5173: add checkindex piece of LUCENE-5116, prevent 0 document segments completely
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed Aug 14 16:43:22 2013
@@ -172,6 +172,9 @@ API Changes
Analyzer. Analyzer adds a getter to retrieve the strategy for this use-case.
(Uwe Schindler, Robert Muir, Shay Banon)
+* LUCENE-5173: Lucene never writes segments with 0 documents anymore.
+ (Shai Erera, Uwe Schindler, Robert Muir)
+
Optimizations
* LUCENE-5088: Added TermFilter to filter docs by a specific term.
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Wed Aug 14 16:43:22 2013
@@ -496,6 +496,11 @@ public class CheckIndex {
msg(infoStream, " " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " docCount=" + info.info.getDocCount());
segInfoStat.name = info.info.name;
segInfoStat.docCount = info.info.getDocCount();
+
+ final String version = info.info.getVersion();
+ if (info.info.getDocCount() <= 0 && version != null && versionComparator.compare(version, "4.5") >= 0) {
+ throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.getDocCount());
+ }
int toLoseDocCount = info.info.getDocCount();
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Wed Aug 14 16:43:22 2013
@@ -2462,20 +2462,12 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
final List<AtomicReader> mergeReaders = new ArrayList<AtomicReader>();
for (IndexReader indexReader : readers) {
- if (indexReader.numDocs() > 0) {
- numDocs += indexReader.numDocs();
- for (AtomicReaderContext ctx : indexReader.leaves()) {
- if (ctx.reader().numDocs() > 0) { // drop empty (or all deleted) segments
- mergeReaders.add(ctx.reader());
- }
- }
+ numDocs += indexReader.numDocs();
+ for (AtomicReaderContext ctx : indexReader.leaves()) {
+ mergeReaders.add(ctx.reader());
}
}
- if (mergeReaders.isEmpty()) { // no segments with documents to add
- return;
- }
-
final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
// TODO: somehow we should fix this merge so it's
@@ -2487,6 +2479,10 @@ public class IndexWriter implements Clos
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
+
+ if (!merger.shouldMerge()) {
+ return;
+ }
MergeState mergeState;
boolean success = false;
@@ -3733,7 +3729,12 @@ public class IndexWriter implements Clos
MergeState mergeState;
boolean success3 = false;
try {
- mergeState = merger.merge();
+ if (!merger.shouldMerge()) {
+ // would result in a 0 document segment: nothing to merge!
+ mergeState = new MergeState(new ArrayList<AtomicReader>(), merge.info.info, infoStream, checkAbort);
+ } else {
+ mergeState = merger.merge();
+ }
success3 = true;
} finally {
if (!success3) {
@@ -3748,12 +3749,16 @@ public class IndexWriter implements Clos
// Record which codec was used to write the segment
if (infoStream.isEnabled("IW")) {
- infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " +
+ if (merge.info.info.getDocCount() == 0) {
+ infoStream.message("IW", "merge away fully deleted segments");
+ } else {
+ infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " +
(mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " +
(mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " +
(mergeState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " +
(mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " +
(mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs"));
+ }
}
// Very important to do this before opening the reader
@@ -3958,8 +3963,8 @@ public class IndexWriter implements Clos
/** Only for testing.
*
* @lucene.internal */
- void keepFullyDeletedSegments() {
- keepFullyDeletedSegments = true;
+ void setKeepFullyDeletedSegments(boolean v) {
+ keepFullyDeletedSegments = v;
}
boolean getKeepFullyDeletedSegments() {
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Wed Aug 14 16:43:22 2013
@@ -52,12 +52,18 @@ final class SegmentMerger {
// note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!!
SegmentMerger(List<AtomicReader> readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir,
- MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) {
+ MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context) throws IOException {
mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort);
directory = dir;
this.codec = segmentInfo.getCodec();
this.context = context;
this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers);
+ mergeState.segmentInfo.setDocCount(setDocMaps());
+ }
+
+ /** True if any merging should happen */
+ boolean shouldMerge() {
+ return mergeState.segmentInfo.getDocCount() > 0;
}
/**
@@ -67,14 +73,15 @@ final class SegmentMerger {
* @throws IOException if there is a low-level IO error
*/
MergeState merge() throws IOException {
+ if (!shouldMerge()) {
+ throw new IllegalStateException("Merge would result in 0 document segment");
+ }
// NOTE: it's important to add calls to
// checkAbort.work(...) if you make any changes to this
// method that will spend alot of time. The frequency
// of this check impacts how long
// IndexWriter.close(false) takes to actually stop the
// threads.
-
- mergeState.segmentInfo.setDocCount(setDocMaps());
mergeFieldInfos();
setMatchingSegmentReaders();
long t0 = 0;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Wed Aug 14 16:43:22 2013
@@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
@@ -68,6 +69,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.SetOnce;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.packed.PackedInts;
@@ -2188,4 +2190,32 @@ public class TestIndexWriter extends Luc
writer.close();
dir.close();
}
+
+ public void testMergeAllDeleted() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ final SetOnce<IndexWriter> iwRef = new SetOnce<IndexWriter>();
+ iwc.setInfoStream(new RandomIndexWriter.TestPointInfoStream(iwc.getInfoStream(), new RandomIndexWriter.TestPoint() {
+ @Override
+ public void apply(String message) {
+ if ("startCommitMerge".equals(message)) {
+ iwRef.get().setKeepFullyDeletedSegments(false);
+ } else if ("startMergeInit".equals(message)) {
+ iwRef.get().setKeepFullyDeletedSegments(true);
+ }
+ }
+ }));
+ IndexWriter evilWriter = new IndexWriter(dir, iwc);
+ iwRef.set(evilWriter);
+ for (int i = 0; i < 1000; i++) {
+ addDoc(evilWriter);
+ if (random().nextInt(17) == 0) {
+ evilWriter.commit();
+ }
+ }
+ evilWriter.deleteDocuments(new MatchAllDocsQuery());
+ evilWriter.forceMerge(1);
+ evilWriter.close();
+ dir.close();
+ }
}
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java Wed Aug 14 16:43:22 2013
@@ -340,7 +340,7 @@ public class RandomIndexWriter implement
w.forceMerge(maxSegmentCount);
}
- private static final class TestPointInfoStream extends InfoStream {
+ static final class TestPointInfoStream extends InfoStream {
private final InfoStream delegate;
private final TestPoint testPoint;
Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1513955&r1=1513954&r2=1513955&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Wed Aug 14 16:43:22 2013
@@ -793,9 +793,9 @@ public class _TestUtil {
try {
// Carefully invoke what is a package-private (test
// only, internal) method on IndexWriter:
- Method m = IndexWriter.class.getDeclaredMethod("keepFullyDeletedSegments");
+ Method m = IndexWriter.class.getDeclaredMethod("setKeepFullyDeletedSegments", boolean.class);
m.setAccessible(true);
- m.invoke(w);
+ m.invoke(w, Boolean.TRUE);
} catch (Exception e) {
// Should not happen?
throw new RuntimeException(e);