You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/01/29 20:48:58 UTC
svn commit: r1065095 [1/3] - in /lucene/dev/trunk/lucene:
contrib/instantiated/src/test/org/apache/lucene/store/instantiated/
contrib/misc/src/test/org/apache/lucene/index/
contrib/misc/src/test/org/apache/lucene/misc/
contrib/queries/src/test/org/apac...
Author: mikemccand
Date: Sat Jan 29 19:48:56 2011
New Revision: 1065095
URL: http://svn.apache.org/viewvc?rev=1065095&view=rev
Log:
LUCENE-1076: allow non-contiguous merges; improve handling of buffered deletes
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
- copied, changed from r1065077, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
- copied, changed from r1065077, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/MockRandomMergePolicy.java (with props)
Removed:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java
Modified:
lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnDiskFull.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestLazyBug.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBoolean2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDocBoost.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestExplanations.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSort.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermScorer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/cache/TestEntryCreators.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/util/LuceneTestCase.java
Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Sat Jan 29 19:48:56 2011
@@ -65,7 +65,7 @@ public class TestIndicesEquals extends L
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < 20; i++) {
Document document = new Document();
@@ -91,7 +91,7 @@ public class TestIndicesEquals extends L
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
indexWriter.setInfoStream(VERBOSE ? System.out : null);
if (VERBOSE) {
System.out.println("TEST: make test index");
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Sat Jan 29 19:48:56 2011
@@ -54,7 +54,7 @@ public class TestFieldNormModifier exten
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Sat Jan 29 19:48:56 2011
@@ -32,7 +32,7 @@ public class TestMultiPassIndexSplitter
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
Document doc;
for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document();
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Sat Jan 29 19:48:56 2011
@@ -59,7 +59,7 @@ public class TestLengthNormModifier exte
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
Modified: lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Sat Jan 29 19:48:56 2011
@@ -20,16 +20,17 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.HashSet;
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.MultiFields;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
public class DuplicateFilterTest extends LuceneTestCase {
private static final String KEY_FIELD = "url";
@@ -42,7 +43,7 @@ public class DuplicateFilterTest extends
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory);
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
//Add series of docs with filterable fields : url, text and dates flags
addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
Modified: lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Sat Jan 29 19:48:56 2011
@@ -40,7 +40,7 @@ public class FuzzyLikeThisQueryTest exte
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory);
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
//Add series of docs with misspelt names
addDoc(writer, "jonathon smythe","1");
Copied: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (from r1065077, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java?p2=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java&r1=1065077&r2=1065095&rev=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentDeletes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Sat Jan 29 19:48:56 2011
@@ -37,7 +37,8 @@ import org.apache.lucene.util.RamUsageEs
// NOTE: we are sync'd by BufferedDeletes, ie, all access to
// instances of this class is via sync'd methods on
// BufferedDeletes
-class SegmentDeletes {
+
+class BufferedDeletes {
/* Rough logic: HashMap has an array[Entry] w/ varying
load factor (say 2 * POINTER). Entry is object w/ Term
@@ -78,14 +79,16 @@ class SegmentDeletes {
private final static boolean VERBOSE_DELETES = false;
+ long gen;
+
@Override
public String toString() {
if (VERBOSE_DELETES) {
- return "SegmentDeletes [numTerms=" + numTermDeletes + ", terms=" + terms
+ return "gen=" + gen + " numTerms=" + numTermDeletes + ", terms=" + terms
+ ", queries=" + queries + ", docIDs=" + docIDs + ", bytesUsed="
- + bytesUsed + "]";
+ + bytesUsed;
} else {
- String s = "";
+ String s = "gen=" + gen;
if (numTermDeletes.get() != 0) {
s += " " + numTermDeletes.get() + " deleted terms (unique count=" + terms.size() + ")";
}
@@ -102,8 +105,8 @@ class SegmentDeletes {
return s;
}
}
-
- void update(SegmentDeletes in, boolean noLimit) {
+
+ void update(BufferedDeletes in) {
numTermDeletes.addAndGet(in.numTermDeletes.get());
for (Map.Entry<Term,Integer> ent : in.terms.entrySet()) {
final Term term = ent.getKey();
@@ -111,13 +114,7 @@ class SegmentDeletes {
// only incr bytesUsed if this term wasn't already buffered:
bytesUsed.addAndGet(BYTES_PER_DEL_TERM);
}
- final Integer limit;
- if (noLimit) {
- limit = MAX_INT;
- } else {
- limit = ent.getValue();
- }
- terms.put(term, limit);
+ terms.put(term, MAX_INT);
}
for (Map.Entry<Query,Integer> ent : in.queries.entrySet()) {
@@ -126,13 +123,7 @@ class SegmentDeletes {
// only incr bytesUsed if this query wasn't already buffered:
bytesUsed.addAndGet(BYTES_PER_DEL_QUERY);
}
- final Integer limit;
- if (noLimit) {
- limit = MAX_INT;
- } else {
- limit = ent.getValue();
- }
- queries.put(query, limit);
+ queries.put(query, MAX_INT);
}
// docIDs never move across segments and the docIDs
@@ -160,8 +151,8 @@ class SegmentDeletes {
// threads are replacing the same doc at nearly the
// same time, it's possible that one thread that got a
// higher docID is scheduled before the other
- // threads. If we blindly replace than we can get
- // double-doc in the segment.
+ // threads. If we blindly replace than we can
+ // incorrectly get both docs indexed.
return;
}
Copied: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (from r1065077, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java?p2=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java&r1=1065077&r2=1065095&rev=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Sat Jan 29 19:48:56 2011
@@ -19,10 +19,12 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
import java.util.Date;
import java.util.Map.Entry;
-import java.util.Map;
+import java.util.Comparator;
+import java.util.Collections;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
@@ -32,13 +34,30 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
-/** Holds a {@link SegmentDeletes} for each segment in the
- * index. */
-
-class BufferedDeletes {
-
- // Deletes for all flushed/merged segments:
- private final Map<SegmentInfo,SegmentDeletes> deletesMap = new HashMap<SegmentInfo,SegmentDeletes>();
+/* Tracks the stream of {@link BuffereDeletes}.
+ * When DocumensWriter flushes, its buffered
+ * deletes are appended to this stream. We later
+ * apply these deletes (resolve them to the actual
+ * docIDs, per segment) when a merge is started
+ * (only to the to-be-merged segments). We
+ * also apply to all segments when NRT reader is pulled,
+ * commit/close is called, or when too many deletes are
+ * buffered and must be flushed (by RAM usage or by count).
+ *
+ * Each packet is assigned a generation, and each flushed or
+ * merged segment is also assigned a generation, so we can
+ * track which BufferedDeletes packets to apply to any given
+ * segment. */
+
+class BufferedDeletesStream {
+
+ // TODO: maybe linked list?
+ private final List<BufferedDeletes> deletes = new ArrayList<BufferedDeletes>();
+
+ // Starts at 1 so that SegmentInfos that have never had
+ // deletes applied (whose bufferedDelGen defaults to 0)
+ // will be correct:
+ private long nextGen = 1;
// used only by assert
private Term lastDeleteTerm;
@@ -48,13 +67,13 @@ class BufferedDeletes {
private final AtomicInteger numTerms = new AtomicInteger();
private final int messageID;
- public BufferedDeletes(int messageID) {
+ public BufferedDeletesStream(int messageID) {
this.messageID = messageID;
}
private synchronized void message(String message) {
if (infoStream != null) {
- infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: BD " + message);
+ infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
}
}
@@ -62,40 +81,29 @@ class BufferedDeletes {
this.infoStream = infoStream;
}
- public synchronized void pushDeletes(SegmentDeletes newDeletes, SegmentInfo info) {
- pushDeletes(newDeletes, info, false);
- }
-
- // Moves all pending deletes onto the provided segment,
- // then clears the pending deletes
- public synchronized void pushDeletes(SegmentDeletes newDeletes, SegmentInfo info, boolean noLimit) {
- assert newDeletes.any();
- numTerms.addAndGet(newDeletes.numTermDeletes.get());
-
- if (!noLimit) {
- assert !deletesMap.containsKey(info);
- assert info != null;
- deletesMap.put(info, newDeletes);
- bytesUsed.addAndGet(newDeletes.bytesUsed.get());
- } else {
- final SegmentDeletes deletes = getDeletes(info);
- bytesUsed.addAndGet(-deletes.bytesUsed.get());
- deletes.update(newDeletes, noLimit);
- bytesUsed.addAndGet(deletes.bytesUsed.get());
- }
+ // Appends a new packet of buffered deletes to the stream,
+ // setting its generation:
+ public synchronized void push(BufferedDeletes packet) {
+ assert packet.any();
+ assert checkDeleteStats();
+ packet.gen = nextGen++;
+ deletes.add(packet);
+ numTerms.addAndGet(packet.numTermDeletes.get());
+ bytesUsed.addAndGet(packet.bytesUsed.get());
if (infoStream != null) {
- message("push deletes seg=" + info + " dels=" + getDeletes(info));
+ message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size());
}
assert checkDeleteStats();
}
-
+
public synchronized void clear() {
- deletesMap.clear();
+ deletes.clear();
+ nextGen = 1;
numTerms.set(0);
bytesUsed.set(0);
}
- synchronized boolean any() {
+ public boolean any() {
return bytesUsed.get() != 0;
}
@@ -107,194 +115,203 @@ class BufferedDeletes {
return bytesUsed.get();
}
- // IW calls this on finishing a merge. While the merge
- // was running, it's possible new deletes were pushed onto
- // our last (and only our last) segment. In this case we
- // must carry forward those deletes onto the merged
- // segment.
- synchronized void commitMerge(MergePolicy.OneMerge merge) {
- assert checkDeleteStats();
- if (infoStream != null) {
- message("commitMerge merge.info=" + merge.info + " merge.segments=" + merge.segments);
- }
- final SegmentInfo lastInfo = merge.segments.lastElement();
- final SegmentDeletes lastDeletes = deletesMap.get(lastInfo);
- if (lastDeletes != null) {
- deletesMap.remove(lastInfo);
- assert !deletesMap.containsKey(merge.info);
- deletesMap.put(merge.info, lastDeletes);
- // don't need to update numTerms/bytesUsed since we
- // are just moving the deletes from one info to
- // another
- if (infoStream != null) {
- message("commitMerge done: new deletions=" + lastDeletes);
- }
- } else if (infoStream != null) {
- message("commitMerge done: no new deletions");
+ public static class ApplyDeletesResult {
+ // True if any actual deletes took place:
+ public final boolean anyDeletes;
+
+ // Current gen, for the merged segment:
+ public final long gen;
+
+ ApplyDeletesResult(boolean anyDeletes, long gen) {
+ this.anyDeletes = anyDeletes;
+ this.gen = gen;
}
- assert !anyDeletes(merge.segments.range(0, merge.segments.size()-1));
- assert checkDeleteStats();
}
- synchronized void clear(SegmentDeletes deletes) {
- deletes.clear();
- }
-
- public synchronized boolean applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos segmentInfos, SegmentInfos applyInfos) throws IOException {
- if (!any()) {
- return false;
+ // Sorts SegmentInfos from smallest to biggest bufferedDelGen:
+ private static final Comparator<SegmentInfo> sortByDelGen = new Comparator<SegmentInfo>() {
+ @Override
+ public int compare(SegmentInfo si1, SegmentInfo si2) {
+ final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen();
+ if (cmp > 0) {
+ return 1;
+ } else if (cmp < 0) {
+ return -1;
+ } else {
+ return 0;
+ }
}
+
+ @Override
+ public boolean equals(Object other) {
+ return sortByDelGen == other;
+ }
+ };
+
+ /** Resolves the buffered deleted Term/Query/docIDs, into
+ * actual deleted docIDs in the deletedDocs BitVector for
+ * each SegmentReader. */
+ public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos infos) throws IOException {
final long t0 = System.currentTimeMillis();
- if (infoStream != null) {
- message("applyDeletes: applyInfos=" + applyInfos + "; index=" + segmentInfos);
+ if (infos.size() == 0) {
+ return new ApplyDeletesResult(false, nextGen++);
}
assert checkDeleteStats();
- assert applyInfos.size() > 0;
-
- boolean any = false;
-
- final SegmentInfo lastApplyInfo = applyInfos.lastElement();
- final int lastIdx = segmentInfos.indexOf(lastApplyInfo);
-
- final SegmentInfo firstInfo = applyInfos.firstElement();
- final int firstIdx = segmentInfos.indexOf(firstInfo);
+ if (!any()) {
+ message("applyDeletes: no deletes; skipping");
+ return new ApplyDeletesResult(false, nextGen++);
+ }
- // applyInfos must be a slice of segmentInfos
- assert lastIdx - firstIdx + 1 == applyInfos.size();
-
- // iterate over all segment infos backwards
- // coalesceing deletes along the way
- // when we're at or below the last of the
- // segments to apply to, start applying the deletes
- // we traverse up to the first apply infos
- SegmentDeletes coalescedDeletes = null;
- boolean hasDeletes = false;
- for (int segIdx=segmentInfos.size()-1; segIdx >= firstIdx; segIdx--) {
- final SegmentInfo info = segmentInfos.info(segIdx);
- final SegmentDeletes deletes = deletesMap.get(info);
- assert deletes == null || deletes.any();
+ if (infoStream != null) {
+ message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
+ }
- if (deletes == null && coalescedDeletes == null) {
- continue;
- }
+ SegmentInfos infos2 = new SegmentInfos();
+ infos2.addAll(infos);
+ Collections.sort(infos2, sortByDelGen);
- if (infoStream != null) {
- message("applyDeletes: seg=" + info + " segment's deletes=[" + (deletes == null ? "null" : deletes) + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "]");
- }
+ BufferedDeletes coalescedDeletes = null;
+ boolean anyNewDeletes = false;
- hasDeletes |= deletes != null;
+ int infosIDX = infos2.size()-1;
+ int delIDX = deletes.size()-1;
- if (segIdx <= lastIdx && hasDeletes) {
+ while (infosIDX >= 0) {
+ //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
- final long delCountInc = applyDeletes(readerPool, info, coalescedDeletes, deletes);
+ final BufferedDeletes packet = delIDX >= 0 ? deletes.get(delIDX) : null;
+ final SegmentInfo info = infos2.get(infosIDX);
+ final long segGen = info.getBufferedDeletesGen();
- if (delCountInc != 0) {
- any = true;
+ if (packet != null && segGen < packet.gen) {
+ //System.out.println(" coalesce");
+ if (coalescedDeletes == null) {
+ coalescedDeletes = new BufferedDeletes();
}
- if (infoStream != null) {
- message("deletes touched " + delCountInc + " docIDs");
+ coalescedDeletes.update(packet);
+ delIDX--;
+ } else if (packet != null && segGen == packet.gen) {
+ //System.out.println(" eq");
+
+ // Lock order: IW -> BD -> RP
+ assert readerPool.infoIsLive(info);
+ SegmentReader reader = readerPool.get(info, false);
+ int delCount = 0;
+ try {
+ if (coalescedDeletes != null) {
+ delCount += applyDeletes(coalescedDeletes, reader);
+ }
+ delCount += applyDeletes(packet, reader);
+ } finally {
+ readerPool.release(reader);
}
-
- if (deletes != null) {
- // we've applied doc ids, and they're only applied
- // on the current segment
- bytesUsed.addAndGet(-deletes.docIDs.size() * SegmentDeletes.BYTES_PER_DEL_DOCID);
- deletes.clearDocIDs();
+ anyNewDeletes |= delCount > 0;
+
+ // We've applied doc ids, and they're only applied
+ // on the current segment
+ bytesUsed.addAndGet(-packet.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID);
+ packet.clearDocIDs();
+
+ if (infoStream != null) {
+ message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount);
}
- }
-
- // now coalesce at the max limit
- if (deletes != null) {
+
if (coalescedDeletes == null) {
- coalescedDeletes = new SegmentDeletes();
+ coalescedDeletes = new BufferedDeletes();
}
- // TODO: we could make this single pass (coalesce as
- // we apply the deletes
- coalescedDeletes.update(deletes, true);
- }
- }
-
- // move all deletes to segment just before our merge.
- if (firstIdx > 0) {
-
- SegmentDeletes mergedDeletes = null;
- // TODO: we could also make this single pass
- for (SegmentInfo info : applyInfos) {
- final SegmentDeletes deletes = deletesMap.get(info);
- if (deletes != null) {
- assert deletes.any();
- if (mergedDeletes == null) {
- mergedDeletes = getDeletes(segmentInfos.info(firstIdx-1));
- numTerms.addAndGet(-mergedDeletes.numTermDeletes.get());
- assert numTerms.get() >= 0;
- bytesUsed.addAndGet(-mergedDeletes.bytesUsed.get());
- assert bytesUsed.get() >= 0;
+ coalescedDeletes.update(packet);
+ delIDX--;
+ infosIDX--;
+ info.setBufferedDeletesGen(nextGen);
+
+ } else {
+ //System.out.println(" gt");
+
+ if (coalescedDeletes != null) {
+ // Lock order: IW -> BD -> RP
+ assert readerPool.infoIsLive(info);
+ SegmentReader reader = readerPool.get(info, false);
+ int delCount = 0;
+ try {
+ delCount += applyDeletes(coalescedDeletes, reader);
+ } finally {
+ readerPool.release(reader);
}
+ anyNewDeletes |= delCount > 0;
- mergedDeletes.update(deletes, true);
+ if (infoStream != null) {
+ message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount);
+ }
}
- }
+ info.setBufferedDeletesGen(nextGen);
- if (mergedDeletes != null) {
- numTerms.addAndGet(mergedDeletes.numTermDeletes.get());
- bytesUsed.addAndGet(mergedDeletes.bytesUsed.get());
+ infosIDX--;
}
-
- if (infoStream != null) {
- if (mergedDeletes != null) {
- message("applyDeletes: merge all deletes into seg=" + segmentInfos.info(firstIdx-1) + ": " + mergedDeletes);
- } else {
- message("applyDeletes: no deletes to merge");
- }
- }
- } else {
- // We drop the deletes in this case, because we've
- // applied them to segment infos starting w/ the first
- // segment. There are no prior segments so there's no
- // reason to keep them around. When the applyInfos ==
- // segmentInfos this means all deletes have been
- // removed:
}
- remove(applyInfos);
assert checkDeleteStats();
- assert applyInfos != segmentInfos || !any();
-
if (infoStream != null) {
message("applyDeletes took " + (System.currentTimeMillis()-t0) + " msec");
}
- return any;
- }
-
- private synchronized long applyDeletes(IndexWriter.ReaderPool readerPool,
- SegmentInfo info,
- SegmentDeletes coalescedDeletes,
- SegmentDeletes segmentDeletes) throws IOException {
- assert readerPool.infoIsLive(info);
-
- assert coalescedDeletes == null || coalescedDeletes.docIDs.size() == 0;
+ // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;
- long delCount = 0;
+ return new ApplyDeletesResult(anyNewDeletes, nextGen++);
+ }
+
+ public synchronized long getNextGen() {
+ return nextGen++;
+ }
- // Lock order: IW -> BD -> RP
- SegmentReader reader = readerPool.get(info, false);
- try {
- if (coalescedDeletes != null) {
- delCount += applyDeletes(coalescedDeletes, reader);
+ // Lock order IW -> BD
+ /* Removes any BufferedDeletes that we no longer need to
+ * store because all segments in the index have had the
+ * deletes applied. */
+ public synchronized void prune(SegmentInfos segmentInfos) {
+ assert checkDeleteStats();
+ long minGen = Long.MAX_VALUE;
+ for(SegmentInfo info : segmentInfos) {
+ minGen = Math.min(info.getBufferedDeletesGen(), minGen);
+ }
+
+ if (infoStream != null) {
+ message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size());
+ }
+
+ final int limit = deletes.size();
+ for(int delIDX=0;delIDX<limit;delIDX++) {
+ if (deletes.get(delIDX).gen >= minGen) {
+ prune(delIDX);
+ assert checkDeleteStats();
+ return;
}
- if (segmentDeletes != null) {
- delCount += applyDeletes(segmentDeletes, reader);
+ }
+
+ // All deletes pruned
+ prune(limit);
+ assert !any();
+ assert checkDeleteStats();
+ }
+
+ private synchronized void prune(int count) {
+ if (count > 0) {
+ if (infoStream != null) {
+ message("pruneDeletes: prune " + count + " packets; " + (deletes.size() - count) + " packets remain");
+ }
+ for(int delIDX=0;delIDX<count;delIDX++) {
+ final BufferedDeletes packet = deletes.get(delIDX);
+ numTerms.addAndGet(-packet.numTermDeletes.get());
+ assert numTerms.get() >= 0;
+ bytesUsed.addAndGet(-packet.bytesUsed.get());
+ assert bytesUsed.get() >= 0;
}
- } finally {
- readerPool.release(reader);
+ deletes.subList(0, count).clear();
}
- return delCount;
}
-
- private synchronized long applyDeletes(SegmentDeletes deletes, SegmentReader reader) throws IOException {
+
+ private synchronized long applyDeletes(BufferedDeletes deletes, SegmentReader reader) throws IOException {
long delCount = 0;
@@ -399,39 +416,6 @@ class BufferedDeletes {
return delCount;
}
- public synchronized SegmentDeletes getDeletes(SegmentInfo info) {
- SegmentDeletes deletes = deletesMap.get(info);
- if (deletes == null) {
- deletes = new SegmentDeletes();
- deletesMap.put(info, deletes);
- }
- return deletes;
- }
-
- public synchronized void remove(SegmentInfos infos) {
- assert infos.size() > 0;
- for (SegmentInfo info : infos) {
- SegmentDeletes deletes = deletesMap.get(info);
- if (deletes != null) {
- bytesUsed.addAndGet(-deletes.bytesUsed.get());
- assert bytesUsed.get() >= 0: "bytesUsed=" + bytesUsed;
- numTerms.addAndGet(-deletes.numTermDeletes.get());
- assert numTerms.get() >= 0: "numTerms=" + numTerms;
- deletesMap.remove(info);
- }
- }
- }
-
- // used only by assert
- private boolean anyDeletes(SegmentInfos infos) {
- for(SegmentInfo info : infos) {
- if (deletesMap.containsKey(info)) {
- return true;
- }
- }
- return false;
- }
-
// used only by assert
private boolean checkDeleteTerm(Term term) {
if (term != null) {
@@ -445,9 +429,9 @@ class BufferedDeletes {
private boolean checkDeleteStats() {
int numTerms2 = 0;
long bytesUsed2 = 0;
- for(SegmentDeletes deletes : deletesMap.values()) {
- numTerms2 += deletes.numTermDeletes.get();
- bytesUsed2 += deletes.bytesUsed.get();
+ for(BufferedDeletes packet : deletes) {
+ numTerms2 += packet.numTermDeletes.get();
+ bytesUsed2 += packet.bytesUsed.get();
}
assert numTerms2 == numTerms.get(): "numTerms2=" + numTerms2 + " vs " + numTerms.get();
assert bytesUsed2 == bytesUsed.get(): "bytesUsed2=" + bytesUsed2 + " vs " + bytesUsed;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Sat Jan 29 19:48:56 2011
@@ -134,7 +134,7 @@ final class DocumentsWriter {
private final int maxThreadStates;
// Deletes for our still-in-RAM (to be flushed next) segment
- private SegmentDeletes pendingDeletes = new SegmentDeletes();
+ private BufferedDeletes pendingDeletes = new BufferedDeletes();
static class DocState {
DocumentsWriter docWriter;
@@ -278,16 +278,16 @@ final class DocumentsWriter {
private boolean closed;
private final FieldInfos fieldInfos;
- private final BufferedDeletes bufferedDeletes;
+ private final BufferedDeletesStream bufferedDeletesStream;
private final IndexWriter.FlushControl flushControl;
- DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletes bufferedDeletes) throws IOException {
+ DocumentsWriter(Directory directory, IndexWriter writer, IndexingChain indexingChain, int maxThreadStates, FieldInfos fieldInfos, BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
this.writer = writer;
this.similarityProvider = writer.getConfig().getSimilarityProvider();
this.maxThreadStates = maxThreadStates;
this.fieldInfos = fieldInfos;
- this.bufferedDeletes = bufferedDeletes;
+ this.bufferedDeletesStream = bufferedDeletesStream;
flushControl = writer.flushControl;
consumer = indexingChain.getChain(this);
@@ -501,23 +501,24 @@ final class DocumentsWriter {
}
// for testing
- public SegmentDeletes getPendingDeletes() {
+ public BufferedDeletes getPendingDeletes() {
return pendingDeletes;
}
private void pushDeletes(SegmentInfo newSegment, SegmentInfos segmentInfos) {
// Lock order: DW -> BD
if (pendingDeletes.any()) {
- if (newSegment != null) {
+ if (segmentInfos.size() > 0 || newSegment != null) {
if (infoStream != null) {
- message("flush: push buffered deletes to newSegment");
+ message("flush: push buffered deletes");
}
- bufferedDeletes.pushDeletes(pendingDeletes, newSegment);
- } else if (segmentInfos.size() > 0) {
+ bufferedDeletesStream.push(pendingDeletes);
if (infoStream != null) {
- message("flush: push buffered deletes to previously flushed segment " + segmentInfos.lastElement());
+ message("flush: delGen=" + pendingDeletes.gen);
+ }
+ if (newSegment != null) {
+ newSegment.setBufferedDeletesGen(pendingDeletes.gen);
}
- bufferedDeletes.pushDeletes(pendingDeletes, segmentInfos.lastElement(), true);
} else {
if (infoStream != null) {
message("flush: drop buffered deletes: no segments");
@@ -526,7 +527,9 @@ final class DocumentsWriter {
// there are no segments, the deletions cannot
// affect anything.
}
- pendingDeletes = new SegmentDeletes();
+ pendingDeletes = new BufferedDeletes();
+ } else if (newSegment != null) {
+ newSegment.setBufferedDeletesGen(bufferedDeletesStream.getNextGen());
}
}
@@ -639,7 +642,6 @@ final class DocumentsWriter {
// Lock order: IW -> DW -> BD
pushDeletes(newSegment, segmentInfos);
-
if (infoStream != null) {
message("flush time " + (System.currentTimeMillis()-startTime) + " msec");
}
@@ -964,7 +966,7 @@ final class DocumentsWriter {
final boolean doBalance;
final long deletesRAMUsed;
- deletesRAMUsed = bufferedDeletes.bytesUsed();
+ deletesRAMUsed = bufferedDeletesStream.bytesUsed();
synchronized(this) {
if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH || bufferIsFull) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sat Jan 29 19:48:56 2011
@@ -251,7 +251,7 @@ public class IndexWriter implements Clos
private final AtomicInteger flushDeletesCount = new AtomicInteger();
final ReaderPool readerPool = new ReaderPool();
- final BufferedDeletes bufferedDeletes;
+ final BufferedDeletesStream bufferedDeletesStream;
// This is a "write once" variable (like the organic dye
// on a DVD-R that may or may not be heated by a laser and
@@ -707,8 +707,8 @@ public class IndexWriter implements Clos
mergedSegmentWarmer = conf.getMergedSegmentWarmer();
codecs = conf.getCodecProvider();
- bufferedDeletes = new BufferedDeletes(messageID);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream = new BufferedDeletesStream(messageID);
+ bufferedDeletesStream.setInfoStream(infoStream);
poolReaders = conf.getReaderPooling();
OpenMode mode = conf.getOpenMode();
@@ -773,7 +773,7 @@ public class IndexWriter implements Clos
setRollbackSegmentInfos(segmentInfos);
- docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletes);
+ docWriter = new DocumentsWriter(directory, this, conf.getIndexingChain(), conf.getMaxThreadStates(), getCurrentFieldInfos(), bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -921,7 +921,7 @@ public class IndexWriter implements Clos
this.infoStream = infoStream;
docWriter.setInfoStream(infoStream);
deleter.setInfoStream(infoStream);
- bufferedDeletes.setInfoStream(infoStream);
+ bufferedDeletesStream.setInfoStream(infoStream);
if (infoStream != null)
messageState();
}
@@ -1167,7 +1167,7 @@ public class IndexWriter implements Clos
public synchronized boolean hasDeletions() throws IOException {
ensureOpen();
- if (bufferedDeletes.any()) {
+ if (bufferedDeletesStream.any()) {
return true;
}
if (docWriter.anyDeletions()) {
@@ -1882,7 +1882,7 @@ public class IndexWriter implements Clos
mergePolicy.close();
mergeScheduler.close();
- bufferedDeletes.clear();
+ bufferedDeletesStream.clear();
synchronized(this) {
@@ -2525,10 +2525,10 @@ public class IndexWriter implements Clos
// tiny segments:
if (flushControl.getFlushDeletes() ||
(config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- bufferedDeletes.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
+ bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
applyAllDeletes = true;
if (infoStream != null) {
- message("force apply deletes bytesUsed=" + bufferedDeletes.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
+ message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
}
}
}
@@ -2538,12 +2538,15 @@ public class IndexWriter implements Clos
message("apply all deletes during flush");
}
flushDeletesCount.incrementAndGet();
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, segmentInfos)) {
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
+ if (result.anyDeletes) {
checkpoint();
}
+ bufferedDeletesStream.prune(segmentInfos);
+ assert !bufferedDeletesStream.any();
flushControl.clearDeletes();
} else if (infoStream != null) {
- message("don't apply deletes now delTermCount=" + bufferedDeletes.numTerms() + " bytesUsed=" + bufferedDeletes.bytesUsed());
+ message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
}
doAfterFlush();
@@ -2569,7 +2572,7 @@ public class IndexWriter implements Clos
*/
public final long ramSizeInBytes() {
ensureOpen();
- return docWriter.bytesUsed() + bufferedDeletes.bytesUsed();
+ return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
}
/** Expert: Return the number of documents currently
@@ -2579,28 +2582,12 @@ public class IndexWriter implements Clos
return docWriter.getNumDocs();
}
- private int ensureContiguousMerge(MergePolicy.OneMerge merge) {
-
- int first = segmentInfos.indexOf(merge.segments.info(0));
- if (first == -1)
- throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current index " + segString(), directory);
-
- final int numSegments = segmentInfos.size();
-
- final int numSegmentsToMerge = merge.segments.size();
- for(int i=0;i<numSegmentsToMerge;i++) {
- final SegmentInfo info = merge.segments.info(i);
-
- if (first + i >= numSegments || !segmentInfos.info(first+i).equals(info)) {
- if (segmentInfos.indexOf(info) == -1)
- throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
- else
- throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle",
- directory);
+ private void ensureValidMerge(MergePolicy.OneMerge merge) {
+ for(SegmentInfo info : merge.segments) {
+ if (segmentInfos.indexOf(info) == -1) {
+ throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
}
}
-
- return first;
}
/** Carefully merges deletes for the segments we just
@@ -2625,9 +2612,11 @@ public class IndexWriter implements Clos
// started merging:
int docUpto = 0;
int delCount = 0;
+ long minGen = Long.MAX_VALUE;
for(int i=0; i < sourceSegments.size(); i++) {
SegmentInfo info = sourceSegments.info(i);
+ minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
SegmentReader previousReader = merge.readersClone[i];
final Bits prevDelDocs = previousReader.getDeletedDocs();
@@ -2678,9 +2667,17 @@ public class IndexWriter implements Clos
assert mergedReader.numDeletedDocs() == delCount;
mergedReader.hasChanges = delCount > 0;
+
+ // If new deletes were applied while we were merging
+ // (which happens if eg commit() or getReader() is
+ // called during our merge), then it better be the case
+ // that the delGen has increased for all our merged
+ // segments:
+ assert !mergedReader.hasChanges || minGen > mergedReader.getSegmentInfo().getBufferedDeletesGen();
+
+ mergedReader.getSegmentInfo().setBufferedDeletesGen(minGen);
}
- /* FIXME if we want to support non-contiguous segment merges */
synchronized private boolean commitMerge(MergePolicy.OneMerge merge, SegmentReader mergedReader) throws IOException {
assert testPoint("startCommitMerge");
@@ -2706,7 +2703,7 @@ public class IndexWriter implements Clos
return false;
}
- final int start = ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
commitMergedDeletes(merge, mergedReader);
@@ -2716,10 +2713,32 @@ public class IndexWriter implements Clos
// format as well:
setMergeDocStoreIsCompoundFile(merge);
- segmentInfos.subList(start, start + merge.segments.size()).clear();
assert !segmentInfos.contains(merge.info);
- segmentInfos.add(start, merge.info);
-
+
+ final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+ int segIdx = 0;
+ int newSegIdx = 0;
+ boolean inserted = false;
+ final int curSegCount = segmentInfos.size();
+ while(segIdx < curSegCount) {
+ final SegmentInfo info = segmentInfos.info(segIdx++);
+ if (mergedAway.contains(info)) {
+ if (!inserted) {
+ segmentInfos.set(segIdx-1, merge.info);
+ inserted = true;
+ newSegIdx++;
+ }
+ } else {
+ segmentInfos.set(newSegIdx++, info);
+ }
+ }
+ assert newSegIdx == curSegCount - merge.segments.size() + 1;
+ segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
+
+ if (infoStream != null) {
+ message("after commit: " + segString());
+ }
+
closeMergeReaders(merge, false);
// Must note the change to segmentInfos so any commits
@@ -2731,16 +2750,12 @@ public class IndexWriter implements Clos
// disk, updating SegmentInfo, etc.:
readerPool.clear(merge.segments);
- // remove pending deletes of the segments
- // that were merged, moving them onto the segment just
- // before the merged segment
- // Lock order: IW -> BD
- bufferedDeletes.commitMerge(merge);
-
if (merge.optimize) {
// cascade the optimize:
segmentsToOptimize.add(merge.info);
}
+
+
return true;
}
@@ -2868,7 +2883,7 @@ public class IndexWriter implements Clos
}
}
- ensureContiguousMerge(merge);
+ ensureValidMerge(merge);
pendingMerges.add(merge);
@@ -2918,6 +2933,9 @@ public class IndexWriter implements Clos
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
}
+ // TODO: is there any perf benefit to sorting
+ // merged segments? eg biggest to smallest?
+
if (merge.info != null)
// mergeInit already done
return;
@@ -2925,16 +2943,22 @@ public class IndexWriter implements Clos
if (merge.isAborted())
return;
- // Lock order: IW -> BD
- if (bufferedDeletes.applyDeletes(readerPool, segmentInfos, merge.segments)) {
- checkpoint();
- }
-
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
+ // Lock order: IW -> BD
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+ if (result.anyDeletes) {
+ checkpoint();
+ }
+
+ merge.info.setBufferedDeletesGen(result.gen);
+
+ // Lock order: IW -> BD
+ bufferedDeletesStream.prune(segmentInfos);
+
Map<String,String> details = new HashMap<String,String>();
details.put("optimize", Boolean.toString(merge.optimize));
details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -3498,7 +3522,7 @@ public class IndexWriter implements Clos
}
synchronized boolean nrtIsCurrent(SegmentInfos infos) {
- return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletes.any();
+ return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any();
}
synchronized boolean isClosed() {
@@ -3665,7 +3689,7 @@ public class IndexWriter implements Clos
final double ramBufferSizeMB = config.getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
final long limit = (long) (ramBufferSizeMB*1024*1024);
- long used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+ long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
if (used >= limit) {
// DocumentsWriter may be able to free up some
@@ -3673,7 +3697,7 @@ public class IndexWriter implements Clos
// Lock order: FC -> DW
docWriter.balanceRAM();
- used = bufferedDeletes.bytesUsed() + docWriter.bytesUsed();
+ used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
if (used >= limit) {
return setFlushPending("ram full: " + reason, false);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Sat Jan 29 19:48:56 2011
@@ -19,6 +19,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Set;
+import java.util.Arrays;
+import java.util.Comparator;
/** <p>This class implements a {@link MergePolicy} that tries
* to merge segments into levels of exponentially
@@ -67,6 +69,7 @@ public abstract class LogMergePolicy ext
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
+ protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -105,6 +108,21 @@ public abstract class LogMergePolicy ext
writer.get().message("LMP: " + message);
}
+ /** If true, merges must be in-order slice of the
+ * segments. If false, then the merge policy is free to
+ * pick any segments. The default is false, which is
+ * in general more efficient than true since it gives the
+ * merge policy more freedom to pick closely sized
+ * segments. */
+ public void setRequireContiguousMerge(boolean v) {
+ requireContiguousMerge = v;
+ }
+
+ /** See {@link #setRequireContiguousMerge}. */
+ public boolean getRequireContiguousMerge() {
+ return requireContiguousMerge;
+ }
+
/** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.</p> */
@@ -356,6 +374,8 @@ public abstract class LogMergePolicy ext
}
return null;
}
+
+ // TODO: handle non-contiguous merge case differently?
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
@@ -454,6 +474,37 @@ public abstract class LogMergePolicy ext
return spec;
}
+ private static class SegmentInfoAndLevel implements Comparable {
+ SegmentInfo info;
+ float level;
+ int index;
+
+ public SegmentInfoAndLevel(SegmentInfo info, float level, int index) {
+ this.info = info;
+ this.level = level;
+ this.index = index;
+ }
+
+ // Sorts largest to smallest
+ public int compareTo(Object o) {
+ SegmentInfoAndLevel other = (SegmentInfoAndLevel) o;
+ if (level < other.level)
+ return 1;
+ else if (level > other.level)
+ return -1;
+ else
+ return 0;
+ }
+ }
+
+ private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
+ public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
+ return o1.index - o2.index;
+ }
+ }
+
+ private static final SortByIndex sortByIndex = new SortByIndex();
+
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -470,7 +521,7 @@ public abstract class LogMergePolicy ext
// Compute levels, which is just log (base mergeFactor)
// of the size of each segment
- float[] levels = new float[numSegments];
+ SegmentInfoAndLevel[] levels = new SegmentInfoAndLevel[numSegments];
final float norm = (float) Math.log(mergeFactor);
for(int i=0;i<numSegments;i++) {
@@ -480,8 +531,12 @@ public abstract class LogMergePolicy ext
// Floor tiny segments
if (size < 1)
size = 1;
- levels[i] = (float) Math.log(size)/norm;
- message("seg " + info.name + " level=" + levels[i]);
+ levels[i] = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
+ message("seg " + info.name + " level=" + levels[i].level + " size=" + size);
+ }
+
+ if (!requireContiguousMerge) {
+ Arrays.sort(levels);
}
final float levelFloor;
@@ -504,9 +559,9 @@ public abstract class LogMergePolicy ext
// Find max level of all segments not already
// quantized.
- float maxLevel = levels[start];
+ float maxLevel = levels[start].level;
for(int i=1+start;i<numSegments;i++) {
- final float level = levels[i];
+ final float level = levels[i].level;
if (level > maxLevel)
maxLevel = level;
}
@@ -527,7 +582,7 @@ public abstract class LogMergePolicy ext
int upto = numSegments-1;
while(upto >= start) {
- if (levels[upto] >= levelBottom) {
+ if (levels[upto].level >= levelBottom) {
break;
}
upto--;
@@ -540,18 +595,26 @@ public abstract class LogMergePolicy ext
while(end <= 1+upto) {
boolean anyTooLarge = false;
for(int i=start;i<end;i++) {
- final SegmentInfo info = infos.info(i);
+ final SegmentInfo info = levels[i].info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
}
if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose())
+ if (verbose()) {
message(" " + start + " to " + end + ": add this merge");
- spec.add(new OneMerge(infos.range(start, end)));
- } else if (verbose())
+ }
+ Arrays.sort(levels, start, end, sortByIndex);
+ final SegmentInfos mergeInfos = new SegmentInfos();
+ for(int i=start;i<end;i++) {
+ mergeInfos.add(levels[i].info);
+ assert infos.contains(levels[i].info);
+ }
+ spec.add(new OneMerge(mergeInfos));
+ } else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
+ }
start = end;
end = start + mergeFactor;
@@ -599,6 +662,7 @@ public abstract class LogMergePolicy ext
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile);
+ sb.append("requireContiguousMerge=").append(requireContiguousMerge);
sb.append("]");
return sb.toString();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Sat Jan 29 19:48:56 2011
@@ -94,6 +94,10 @@ public final class SegmentInfo {
// specific versions afterwards ("3.0", "3.1" etc.).
// see Constants.LUCENE_MAIN_VERSION.
private String version;
+
+ // NOTE: only used in-RAM by IW to track buffered deletes;
+ // this is never written to/read from the Directory
+ private long bufferedDeletesGen;
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors) {
@@ -679,5 +683,12 @@ public final class SegmentInfo {
public String getVersion() {
return version;
}
-
+
+ long getBufferedDeletesGen() {
+ return bufferedDeletesGen;
+ }
+
+ void setBufferedDeletesGen(long v) {
+ bufferedDeletesGen = v;
+ }
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java Sat Jan 29 19:48:56 2011
@@ -91,8 +91,8 @@ public class TestMergeSchedulerExternal
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()).setMergeScheduler(new MyMergeScheduler())
- .setMaxBufferedDocs(2).setRAMBufferSizeMB(
- IndexWriterConfig.DISABLE_AUTO_FLUSH));
+ .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+ .setMergePolicy(newLogMergePolicy()));
LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
logMP.setMergeFactor(10);
for(int i=0;i<20;i++)
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearch.java Sat Jan 29 19:48:56 2011
@@ -74,8 +74,11 @@ public class TestSearch extends LuceneTe
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
+
IndexWriter writer = new IndexWriter(directory, conf);
String[] docs = {
@@ -90,6 +93,7 @@ public class TestSearch extends LuceneTe
for (int j = 0; j < docs.length; j++) {
Document d = new Document();
d.add(newField("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED));
+ d.add(newField("id", ""+j, Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(d);
}
writer.close();
@@ -106,6 +110,10 @@ public class TestSearch extends LuceneTe
};
ScoreDoc[] hits = null;
+ Sort sort = new Sort(new SortField[] {
+ SortField.FIELD_SCORE,
+ new SortField("id", SortField.INT)});
+
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "contents", analyzer);
parser.setPhraseSlop(4);
for (int j = 0; j < queries.length; j++) {
@@ -115,7 +123,7 @@ public class TestSearch extends LuceneTe
System.out.println("TEST: query=" + query);
}
- hits = searcher.search(query, null, 1000).scoreDocs;
+ hits = searcher.search(query, null, 1000, sort).scoreDocs;
out.println(hits.length + " total results");
for (int i = 0 ; i < hits.length && i < 10; i++) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java Sat Jan 29 19:48:56 2011
@@ -80,8 +80,10 @@ public class TestSearchForDuplicates ext
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFiles);
+ final MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFiles);
+ }
IndexWriter writer = new IndexWriter(directory, conf);
if (VERBOSE) {
System.out.println("TEST: now build index");
@@ -93,9 +95,6 @@ public class TestSearchForDuplicates ext
for (int j = 0; j < MAX_DOCS; j++) {
Document d = new Document();
d.add(newField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.ANALYZED));
-
- // NOTE: this ID_FIELD produces no tokens since
- // MockAnalyzer discards numbers
d.add(newField(ID_FIELD, Integer.toString(j), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(d);
}
@@ -112,7 +111,11 @@ public class TestSearchForDuplicates ext
System.out.println("TEST: search query=" + query);
}
- ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS).scoreDocs;
+ final Sort sort = new Sort(new SortField[] {
+ SortField.FIELD_SCORE,
+ new SortField(ID_FIELD, SortField.INT)});
+
+ ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs;
printHits(out, hits, searcher);
checkHits(hits, MAX_DOCS, searcher);
@@ -127,7 +130,7 @@ public class TestSearchForDuplicates ext
query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
out.println("Query: " + query.toString(PRIORITY_FIELD));
- hits = searcher.search(query, null, MAX_DOCS).scoreDocs;
+ hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs;
printHits(out, hits, searcher);
checkHits(hits, MAX_DOCS, searcher);
@@ -149,7 +152,7 @@ public class TestSearchForDuplicates ext
private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) throws IOException {
assertEquals("total results", expectedCount, hits.length);
for (int i = 0 ; i < hits.length; i++) {
- if ( i < 10 || (i > 94 && i < 105) ) {
+ if (i < 10 || (i > 94 && i < 105) ) {
Document d = searcher.doc(hits[i].doc);
assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
}
Added: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/MockRandomMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/MockRandomMergePolicy.java?rev=1065095&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/MockRandomMergePolicy.java (added)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/MockRandomMergePolicy.java Sat Jan 29 19:48:56 2011
@@ -0,0 +1,93 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.util._TestUtil;
+
+public class MockRandomMergePolicy extends MergePolicy {
+ private final Random random;
+
+ public MockRandomMergePolicy(Random random) {
+ // fork a private random, since we are called
+ // unpredictably from threads:
+ this.random = new Random(random.nextLong());
+ }
+
+ @Override
+ public MergeSpecification findMerges(SegmentInfos segmentInfos) {
+ MergeSpecification mergeSpec = null;
+ //System.out.println("MRMP: findMerges sis=" + segmentInfos);
+
+ if (segmentInfos.size() > 1 && random.nextInt(5) == 3) {
+
+ SegmentInfos segmentInfos2 = new SegmentInfos();
+ segmentInfos2.addAll(segmentInfos);
+ Collections.shuffle(segmentInfos2, random);
+
+ // TODO: sometimes make more than 1 merge?
+ mergeSpec = new MergeSpecification();
+ final int segsToMerge = _TestUtil.nextInt(random, 1, segmentInfos.size());
+ mergeSpec.add(new OneMerge(segmentInfos2.range(0, segsToMerge)));
+ }
+
+ return mergeSpec;
+ }
+
+ @Override
+ public MergeSpecification findMergesForOptimize(
+ SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
+ throws CorruptIndexException, IOException {
+
+ //System.out.println("MRMP: findMergesForOptimize sis=" + segmentInfos);
+ MergeSpecification mergeSpec = null;
+ if (segmentInfos.size() > 1 || (segmentInfos.size() == 1 && segmentInfos.info(0).hasDeletions())) {
+ mergeSpec = new MergeSpecification();
+ SegmentInfos segmentInfos2 = new SegmentInfos();
+ segmentInfos2.addAll(segmentInfos);
+ Collections.shuffle(segmentInfos2, random);
+ int upto = 0;
+ while(upto < segmentInfos.size()) {
+ int inc = _TestUtil.nextInt(random, 1, segmentInfos.size()-upto);
+ mergeSpec.add(new OneMerge(segmentInfos2.range(upto, upto+inc)));
+ upto += inc;
+ }
+ }
+ return mergeSpec;
+ }
+
+ @Override
+ public MergeSpecification findMergesToExpungeDeletes(
+ SegmentInfos segmentInfos)
+ throws CorruptIndexException, IOException {
+ return findMerges(segmentInfos);
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException {
+ return random.nextBoolean();
+ }
+}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Sat Jan 29 19:48:56 2011
@@ -452,6 +452,7 @@ public class TestAddIndexes extends Luce
setMaxBufferedDocs(100).
setMergePolicy(newLogMergePolicy(10))
);
+ writer.setInfoStream(VERBOSE ? System.out : null);
writer.addIndexes(aux);
assertEquals(30, writer.maxDoc());
assertEquals(3, writer.getSegmentCount());
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java Sat Jan 29 19:48:56 2011
@@ -131,6 +131,7 @@ public class TestAtomicUpdate extends Lu
.setMaxBufferedDocs(7);
((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(3);
IndexWriter writer = new MockIndexWriter(directory, conf);
+ writer.setInfoStream(VERBOSE ? System.out : null);
// Establish a base index of 100 docs:
for(int i=0;i<100;i++) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java Sat Jan 29 19:48:56 2011
@@ -132,11 +132,15 @@ public class TestConcurrentMergeSchedule
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
.setMergePolicy(mp));
+ writer.setInfoStream(VERBOSE ? System.out : null);
Document doc = new Document();
Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
doc.add(idField);
for(int i=0;i<10;i++) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: cycle");
+ }
for(int j=0;j<100;j++) {
idField.setValue(Integer.toString(i*100+j));
writer.addDocument(doc);
@@ -144,6 +148,9 @@ public class TestConcurrentMergeSchedule
int delID = i;
while(delID < 100*(1+i)) {
+ if (VERBOSE) {
+ System.out.println("TEST: del " + delID);
+ }
writer.deleteDocuments(new Term("id", ""+delID));
delID += 10;
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java?rev=1065095&r1=1065094&r2=1065095&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Sat Jan 29 19:48:56 2011
@@ -119,6 +119,9 @@ public class TestDeletionPolicy extends
}
public void onInit(List<? extends IndexCommit> commits) throws IOException {
+ if (VERBOSE) {
+ System.out.println("TEST: onInit");
+ }
verifyCommitOrder(commits);
numOnInit++;
// do no deletions on init
@@ -126,6 +129,9 @@ public class TestDeletionPolicy extends
}
public void onCommit(List<? extends IndexCommit> commits) throws IOException {
+ if (VERBOSE) {
+ System.out.println("TEST: onCommit");
+ }
verifyCommitOrder(commits);
doDeletes(commits, true);
}
@@ -200,8 +206,10 @@ public class TestDeletionPolicy extends
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer())
.setIndexDeletionPolicy(policy);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(true);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(true);
+ }
IndexWriter writer = new IndexWriter(dir, conf);
writer.close();
@@ -215,8 +223,10 @@ public class TestDeletionPolicy extends
conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer()).setOpenMode(
OpenMode.APPEND).setIndexDeletionPolicy(policy);
- lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(true);
+ mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(true);
+ }
writer = new IndexWriter(dir, conf);
for(int j=0;j<17;j++) {
addDoc(writer);
@@ -280,6 +290,10 @@ public class TestDeletionPolicy extends
public void testKeepAllDeletionPolicy() throws IOException {
for(int pass=0;pass<2;pass++) {
+ if (VERBOSE) {
+ System.out.println("TEST: cycle pass=" + pass);
+ }
+
boolean useCompoundFile = (pass % 2) != 0;
// Never deletes a commit
@@ -292,34 +306,48 @@ public class TestDeletionPolicy extends
TEST_VERSION_CURRENT, new MockAnalyzer())
.setIndexDeletionPolicy(policy).setMaxBufferedDocs(10)
.setMergeScheduler(new SerialMergeScheduler());
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
- lmp.setMergeFactor(10);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
IndexWriter writer = new IndexWriter(dir, conf);
for(int i=0;i<107;i++) {
addDoc(writer);
}
writer.close();
- conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer()).setOpenMode(
- OpenMode.APPEND).setIndexDeletionPolicy(policy);
- lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
- writer = new IndexWriter(dir, conf);
- writer.optimize();
- writer.close();
-
- assertEquals(1, policy.numOnInit);
+ final boolean isOptimized;
+ {
+ IndexReader r = IndexReader.open(dir);
+ isOptimized = r.isOptimized();
+ r.close();
+ }
+ if (!isOptimized) {
+ conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer()).setOpenMode(
+ OpenMode.APPEND).setIndexDeletionPolicy(policy);
+ mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
+ if (VERBOSE) {
+ System.out.println("TEST: open writer for optimize");
+ }
+ writer = new IndexWriter(dir, conf);
+ writer.setInfoStream(VERBOSE ? System.out : null);
+ writer.optimize();
+ writer.close();
+ }
+ assertEquals(isOptimized ? 0:1, policy.numOnInit);
// If we are not auto committing then there should
// be exactly 2 commits (one per close above):
- assertEquals(2, policy.numOnCommit);
+ assertEquals(1 + (isOptimized ? 0:1), policy.numOnCommit);
// Test listCommits
Collection<IndexCommit> commits = IndexReader.listCommits(dir);
// 2 from closing writer
- assertEquals(2, commits.size());
+ assertEquals(1 + (isOptimized ? 0:1), commits.size());
// Make sure we can open a reader on each commit:
for (final IndexCommit commit : commits) {
@@ -480,8 +508,10 @@ public class TestDeletionPolicy extends
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy)
.setMaxBufferedDocs(10);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
IndexWriter writer = new IndexWriter(dir, conf);
for(int i=0;i<107;i++) {
addDoc(writer);
@@ -490,8 +520,10 @@ public class TestDeletionPolicy extends
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy);
- lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(true);
+ }
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
@@ -529,8 +561,10 @@ public class TestDeletionPolicy extends
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy)
.setMaxBufferedDocs(10);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
IndexWriter writer = new IndexWriter(dir, conf);
for(int i=0;i<17;i++) {
addDoc(writer);
@@ -586,24 +620,34 @@ public class TestDeletionPolicy extends
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
IndexWriter writer = new IndexWriter(dir, conf);
writer.close();
Term searchTerm = new Term("content", "aaa");
Query query = new TermQuery(searchTerm);
for(int i=0;i<N+1;i++) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: cycle i=" + i);
+ }
conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy);
- lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
writer = new IndexWriter(dir, conf);
for(int j=0;j<17;j++) {
addDoc(writer);
}
// this is a commit
+ if (VERBOSE) {
+ System.out.println("TEST: close writer");
+ }
writer.close();
IndexReader reader = IndexReader.open(dir, policy, false);
reader.deleteDocument(3*i+1);
@@ -612,20 +656,28 @@ public class TestDeletionPolicy extends
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(16*(1+i), hits.length);
// this is a commit
+ if (VERBOSE) {
+ System.out.println("TEST: close reader numOnCommit=" + policy.numOnCommit);
+ }
reader.close();
searcher.close();
}
conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy);
- lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
+ IndexReader r = IndexReader.open(dir);
+ final boolean wasOptimized = r.isOptimized();
+ r.close();
writer = new IndexWriter(dir, conf);
writer.optimize();
// this is a commit
writer.close();
assertEquals(2*(N+1)+1, policy.numOnInit);
- assertEquals(2*(N+2), policy.numOnCommit);
+ assertEquals(2*(N+2) - (wasOptimized ? 1:0), policy.numOnCommit);
IndexSearcher searcher = new IndexSearcher(dir, false);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
@@ -692,8 +744,10 @@ public class TestDeletionPolicy extends
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy)
.setMaxBufferedDocs(10);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ MergePolicy mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
IndexWriter writer = new IndexWriter(dir, conf);
writer.close();
Term searchTerm = new Term("content", "aaa");
@@ -705,8 +759,10 @@ public class TestDeletionPolicy extends
TEST_VERSION_CURRENT, new MockAnalyzer())
.setOpenMode(OpenMode.APPEND).setIndexDeletionPolicy(policy)
.setMaxBufferedDocs(10);
- lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundFile);
+ mp = conf.getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ ((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
+ }
writer = new IndexWriter(dir, conf);
for(int j=0;j<17;j++) {
addDoc(writer);