You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/04/02 17:47:15 UTC
svn commit: r1088051 [1/2] - in /lucene/dev/trunk: lucene/
lucene/contrib/ant/src/java/org/apache/lucene/ant/
lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/
lucene/contrib/misc/src/test/org/apache/lucene/index/ lucene/contri...
Author: mikemccand
Date: Sat Apr 2 15:47:12 2011
New Revision: 1088051
URL: http://svn.apache.org/viewvc?rev=1088051&view=rev
Log:
LUCENE-1076: new TieredMergePolicy
Added:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java (with props)
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/MIGRATE.txt
lucene/dev/trunk/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java
lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java
lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
lucene/dev/trunk/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestOmitTf.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestPayloads.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestBoolean2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDocBoost.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestExplanations.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSort.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestSubScorerFreqs.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermScorer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestTermVectors.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/cache/TestEntryCreators.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/function/TestValueSource.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sat Apr 2 15:47:12 2011
@@ -157,11 +157,10 @@ Changes in Runtime Behavior
* LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather
than later when e.g. a merge starts. (Shai Erera, Mike McCandless, Uwe Schindler)
-* LUCENE-1076: The default merge policy is now able to merge
- non-contiguous segments, which means docIDs no longer necessarily
- say "in order". If this is a problem then you can use either of the
- LogMergePolicy impls, and call setRequireContiguousMerge(true).
- (Mike McCandless)
+* LUCENE-1076: The default merge policy (TieredMergePolicy) is now
+ able to merge non-contiguous segments, which means docIDs no longer
+ necessarily say "in order". If this is a problem then you can use
+ either of the LogMergePolicy impls. (Mike McCandless)
* LUCENE-2881: FieldInfos is now tracked per segment. Before it was tracked
per IndexWriter session, which resulted in FieldInfos that had the FieldInfo
@@ -333,7 +332,7 @@ New features
* LUCENE-2862: Added TermsEnum.totalTermFreq() and
Terms.getSumTotalTermFreq(). (Mike McCandless, Robert Muir)
-
+
* LUCENE-3001: Added TrieFieldHelper to write solr compatible numeric
fields without the solr dependency. (ryan)
Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Sat Apr 2 15:47:12 2011
@@ -356,3 +356,9 @@ LUCENE-1458, LUCENE-2111: Flexible Index
field as a parameter, this is removed due to the fact the entire Similarity (all methods)
can now be configured per-field.
Methods that apply to the entire query such as coord() and queryNorm() exist in SimilarityProvider.
+
+* LUCENE-1076: TieredMergePolicy is now the default merge policy.
+ It's able to merge non-contiguous segments; this may cause problems
+ for applications that rely on Lucene's internal document ID
+ assigment. If so, you should instead use LogByteSize/DocMergePolicy
+ during indexing.
Modified: lucene/dev/trunk/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java (original)
+++ lucene/dev/trunk/lucene/contrib/ant/src/java/org/apache/lucene/ant/IndexTask.java Sat Apr 2 15:47:12 2011
@@ -39,7 +39,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
@@ -285,9 +285,9 @@ public class IndexTask extends Task {
IndexWriterConfig conf = new IndexWriterConfig(
Version.LUCENE_CURRENT, analyzer).setOpenMode(
create ? OpenMode.CREATE : OpenMode.APPEND);
- LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
- lmp.setUseCompoundFile(useCompoundIndex);
- lmp.setMergeFactor(mergeFactor);
+ TieredMergePolicy tmp = (TieredMergePolicy) conf.getMergePolicy();
+ tmp.setUseCompoundFile(useCompoundIndex);
+ tmp.setMaxMergeAtOnce(mergeFactor);
IndexWriter writer = new IndexWriter(dir, conf);
int totalFiles = 0;
int totalIndexed = 0;
Modified: lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/dev/trunk/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Sat Apr 2 15:47:12 2011
@@ -65,7 +65,7 @@ public class TestIndicesEquals extends L
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < 20; i++) {
Document document = new Document();
@@ -91,7 +91,7 @@ public class TestIndicesEquals extends L
// create dir data
IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
indexWriter.setInfoStream(VERBOSE ? System.out : null);
if (VERBOSE) {
System.out.println("TEST: make test index");
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java Sat Apr 2 15:47:12 2011
@@ -61,7 +61,7 @@ public class TestFieldNormModifier exten
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java Sat Apr 2 15:47:12 2011
@@ -32,7 +32,7 @@ public class TestMultiPassIndexSplitter
public void setUp() throws Exception {
super.setUp();
dir = newDirectory();
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
Document doc;
for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document();
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java Sat Apr 2 15:47:12 2011
@@ -30,7 +30,7 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@@ -137,7 +137,7 @@ public class TestAppendingCodec extends
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_40, new MockAnalyzer());
cfg.setCodecProvider(new AppendingCodecProvider());
- ((LogMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
+ ((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
Modified: lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java Sat Apr 2 15:47:12 2011
@@ -66,7 +66,7 @@ public class TestLengthNormModifier exte
super.setUp();
store = newDirectory();
IndexWriter writer = new IndexWriter(store, newIndexWriterConfig(
- TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
Modified: lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/DuplicateFilterTest.java Sat Apr 2 15:47:12 2011
@@ -43,7 +43,7 @@ public class DuplicateFilterTest extends
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
//Add series of docs with filterable fields : url, text and dates flags
addDoc(writer, "http://lucene.apache.org", "lucene 1.4.3 available", "20040101");
Modified: lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java (original)
+++ lucene/dev/trunk/lucene/contrib/queries/src/test/org/apache/lucene/search/FuzzyLikeThisQueryTest.java Sat Apr 2 15:47:12 2011
@@ -40,7 +40,7 @@ public class FuzzyLikeThisQueryTest exte
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
//Add series of docs with misspelt names
addDoc(writer, "jonathon smythe","1");
Modified: lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/trunk/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Sat Apr 2 15:47:12 2011
@@ -29,7 +29,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Terms;
@@ -45,7 +45,6 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
-import org.apache.lucene.util.VirtualMethod;
/**
* <p>
@@ -508,7 +507,7 @@ public class SpellChecker implements jav
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
- ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(mergeFactor);
+ ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor);
IndexSearcher indexSearcher = obtainSearcher();
final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();
Modified: lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java (original)
+++ lucene/dev/trunk/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Sat Apr 2 15:47:12 2011
@@ -36,7 +36,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
@@ -250,7 +250,7 @@ public class Syns2Index
// override the specific index if it already exists
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
Version.LUCENE_CURRENT, ana).setOpenMode(OpenMode.CREATE));
- ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
+ ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
Iterator<String> i1 = word2Nums.keySet().iterator();
while (i1.hasNext()) // for each word
{
Modified: lucene/dev/trunk/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java (original)
+++ lucene/dev/trunk/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Sat Apr 2 15:47:12 2011
@@ -29,6 +29,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class TestWordnet extends LuceneTestCase {
private IndexSearcher searcher;
@@ -42,6 +43,7 @@ public class TestWordnet extends LuceneT
// create a temporary synonym index
File testFile = getDataFile("testSynonyms.txt");
String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
+ _TestUtil.rmDir(new File(storePathName));
try {
Syns2Index.main(commandLineArgs);
@@ -71,8 +73,12 @@ public class TestWordnet extends LuceneT
@Override
public void tearDown() throws Exception {
- searcher.close();
- dir.close();
+ if (searcher != null) {
+ searcher.close();
+ }
+ if (dir != null) {
+ dir.close();
+ }
rmDir(storePathName); // delete our temporary synonym index
super.tearDown();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Sat Apr 2 15:47:12 2011
@@ -871,7 +871,7 @@ public class IndexWriter implements Clos
* message when maxFieldLength is reached will be printed
* to this.
*/
- public void setInfoStream(PrintStream infoStream) {
+ public void setInfoStream(PrintStream infoStream) throws IOException {
ensureOpen();
this.infoStream = infoStream;
docWriter.setInfoStream(infoStream);
@@ -881,7 +881,7 @@ public class IndexWriter implements Clos
messageState();
}
- private void messageState() {
+ private void messageState() throws IOException {
message("\ndir=" + directory + "\n" +
"index=" + segString() + "\n" +
"version=" + Constants.LUCENE_VERSION + "\n" +
@@ -1640,6 +1640,8 @@ public class IndexWriter implements Clos
throws CorruptIndexException, IOException {
ensureOpen();
+ flush(true, true);
+
if (infoStream != null)
message("expungeDeletes: index now " + segString());
@@ -1712,6 +1714,10 @@ public class IndexWriter implements Clos
* documents, so you must do so yourself if necessary.
* See also {@link #expungeDeletes(boolean)}
*
+ * <p><b>NOTE</b>: this method first flushes a new
+ * segment (if there are indexed documents), and applies
+ * all buffered deletes.
+ *
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
* href="#OOME">above</a> for details.</p>
@@ -2577,7 +2583,7 @@ public class IndexWriter implements Clos
return docWriter.getNumDocs();
}
- private void ensureValidMerge(MergePolicy.OneMerge merge) {
+ private void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
for(SegmentInfo info : merge.segments) {
if (segmentInfos.indexOf(info) == -1) {
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the current index " + segString(), directory);
@@ -2868,7 +2874,7 @@ public class IndexWriter implements Clos
* are now participating in a merge, and true is
* returned. Else (the merge conflicts) false is
* returned. */
- final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException {
+ final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
if (merge.registerDone)
return true;
@@ -2878,10 +2884,8 @@ public class IndexWriter implements Clos
throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory));
}
- final int count = merge.segments.size();
boolean isExternal = false;
- for(int i=0;i<count;i++) {
- final SegmentInfo info = merge.segments.info(i);
+ for(SegmentInfo info : merge.segments) {
if (mergingSegments.contains(info)) {
return false;
}
@@ -2911,12 +2915,15 @@ public class IndexWriter implements Clos
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
- for(int i=0;i<count;i++) {
- mergingSegments.add(merge.segments.info(i));
+ message("registerMerge merging=" + mergingSegments);
+ for(SegmentInfo info : merge.segments) {
+ message("registerMerge info=" + info);
+ mergingSegments.add(info);
}
// Merge is now registered
merge.registerDone = true;
+
return true;
}
@@ -3001,6 +3008,10 @@ public class IndexWriter implements Clos
message("merge seg=" + merge.info.name);
}
+ // TODO: I think this should no longer be needed (we
+ // now build CFS before adding segment to the infos);
+ // however, on removing it, tests fail for some reason!
+
// Also enroll the merged segment into mergingSegments;
// this prevents it from getting selected for a merge
// after our merge is done but while we are building the
@@ -3039,10 +3050,11 @@ public class IndexWriter implements Clos
// exception inside mergeInit
if (merge.registerDone) {
final SegmentInfos sourceSegments = merge.segments;
- final int end = sourceSegments.size();
- for(int i=0;i<end;i++) {
- mergingSegments.remove(sourceSegments.info(i));
+ for(SegmentInfo info : sourceSegments) {
+ mergingSegments.remove(info);
}
+ // TODO: if we remove the add in _mergeInit, we should
+ // also remove this:
mergingSegments.remove(merge.info);
merge.registerDone = false;
}
@@ -3121,6 +3133,8 @@ public class IndexWriter implements Clos
merge.readers = new ArrayList<SegmentReader>();
merge.readerClones = new ArrayList<SegmentReader>();
+ merge.estimatedMergeBytes = 0;
+
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
@@ -3138,6 +3152,13 @@ public class IndexWriter implements Clos
-config.getReaderTermsIndexDivisor());
merge.readers.add(reader);
+ final int readerMaxDoc = reader.maxDoc();
+ if (readerMaxDoc > 0) {
+ final int delCount = reader.numDeletedDocs();
+ final double delRatio = ((double) delCount)/readerMaxDoc;
+ merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
+ }
+
// We clone the segment readers because other
// deletes may come in while we're merging so we
// need readers that will not change
@@ -3239,8 +3260,11 @@ public class IndexWriter implements Clos
merge.info.setUseCompoundFile(true);
}
- final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
+ if (infoStream != null) {
+ message(String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes(true)/1024./1024., merge.estimatedMergeBytes/1024/1024.));
+ }
+ final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
final int termsIndexDivisor;
final boolean loadDocStores;
@@ -3314,21 +3338,41 @@ public class IndexWriter implements Clos
return segmentInfos.size() > 0 ? segmentInfos.info(segmentInfos.size()-1) : null;
}
- public synchronized String segString() {
+ /** @lucene.internal */
+ public synchronized String segString() throws IOException {
return segString(segmentInfos);
}
- private synchronized String segString(SegmentInfos infos) {
+ /** @lucene.internal */
+ public synchronized String segString(SegmentInfos infos) throws IOException {
StringBuilder buffer = new StringBuilder();
final int count = infos.size();
for(int i = 0; i < count; i++) {
if (i > 0) {
buffer.append(' ');
}
- final SegmentInfo info = infos.info(i);
- buffer.append(info.toString(directory, 0));
- if (info.dir != directory)
- buffer.append("**");
+ buffer.append(segString(infos.info(i)));
+ }
+
+ return buffer.toString();
+ }
+
+ public synchronized String segString(SegmentInfo info) throws IOException {
+ StringBuilder buffer = new StringBuilder();
+ SegmentReader reader = readerPool.getIfExists(info);
+ try {
+ if (reader != null) {
+ buffer.append(reader.toString());
+ } else {
+ buffer.append(info.toString(directory, 0));
+ if (info.dir != directory) {
+ buffer.append("**");
+ }
+ }
+ } finally {
+ if (reader != null) {
+ readerPool.release(reader);
+ }
}
return buffer.toString();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Sat Apr 2 15:47:12 2011
@@ -156,7 +156,7 @@ public final class IndexWriterConfig imp
indexingChain = DocumentsWriter.defaultIndexingChain;
mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault();
- mergePolicy = new LogByteSizeMergePolicy();
+ mergePolicy = new TieredMergePolicy();
maxThreadStates = DEFAULT_MAX_THREAD_STATES;
readerPooling = DEFAULT_READER_POOLING;
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Sat Apr 2 15:47:12 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
@@ -72,7 +71,6 @@ public abstract class LogMergePolicy ext
// out there wrote his own LMP ...
protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
- protected boolean requireContiguousMerge = false;
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
@@ -111,21 +109,6 @@ public abstract class LogMergePolicy ext
writer.get().message("LMP: " + message);
}
- /** If true, merges must be in-order slice of the
- * segments. If false, then the merge policy is free to
- * pick any segments. The default is false, which is
- * in general more efficient than true since it gives the
- * merge policy more freedom to pick closely sized
- * segments. */
- public void setRequireContiguousMerge(boolean v) {
- requireContiguousMerge = v;
- }
-
- /** See {@link #setRequireContiguousMerge}. */
- public boolean getRequireContiguousMerge() {
- return requireContiguousMerge;
- }
-
/** <p>Returns the number of segments that are merged at
* once and also controls the total number of segments
* allowed to accumulate in the index.</p> */
@@ -378,8 +361,6 @@ public abstract class LogMergePolicy ext
return null;
}
- // TODO: handle non-contiguous merge case differently?
-
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
// since optimize started):
@@ -499,14 +480,6 @@ public abstract class LogMergePolicy ext
}
}
- private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
- public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
- return o1.index - o2.index;
- }
- }
-
- private static final SortByIndex sortByIndex = new SortByIndex();
-
/** Checks if any merges are now necessary and returns a
* {@link MergePolicy.MergeSpecification} if so. A merge
* is necessary when there are more than {@link
@@ -532,31 +505,24 @@ public abstract class LogMergePolicy ext
final SegmentInfo info = infos.info(i);
long size = size(info);
- // When we require contiguous merge, we still add the
- // segment to levels to avoid merging "across" a set
- // of segment being merged:
- if (!requireContiguousMerge && mergingSegments.contains(info)) {
- if (verbose()) {
- message("seg " + info.name + " already being merged; skip");
- }
- continue;
- }
-
// Floor tiny segments
if (size < 1) {
size = 1;
}
+
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
levels.add(infoLevel);
+
if (verbose()) {
- message("seg " + info.name + " level=" + infoLevel.level + " size=" + size);
+ final long segBytes = sizeBytes(info);
+ String extra = mergingSegments.contains(info) ? " [merging]" : "";
+ if (size >= maxMergeSize) {
+ extra += " [skip: too large]";
+ }
+ message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
}
}
- if (!requireContiguousMerge) {
- Collections.sort(levels);
- }
-
final float levelFloor;
if (minMergeSize <= 0)
levelFloor = (float) 0.0;
@@ -614,23 +580,29 @@ public abstract class LogMergePolicy ext
int end = start + mergeFactor;
while(end <= 1+upto) {
boolean anyTooLarge = false;
+ boolean anyMerging = false;
for(int i=start;i<end;i++) {
final SegmentInfo info = levels.get(i).info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
+ if (mergingSegments.contains(info)) {
+ anyMerging = true;
+ break;
+ }
}
- if (!anyTooLarge) {
+ if (anyMerging) {
+ // skip
+ } else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
- if (verbose()) {
- message(" " + start + " to " + end + ": add this merge");
- }
- Collections.sort(levels.subList(start, end), sortByIndex);
final SegmentInfos mergeInfos = new SegmentInfos();
for(int i=start;i<end;i++) {
mergeInfos.add(levels.get(i).info);
assert infos.contains(levels.get(i).info);
}
+ if (verbose()) {
+ message(" add merge=" + writer.get().segString(mergeInfos) + " start=" + start + " end=" + end);
+ }
spec.add(new OneMerge(mergeInfos));
} else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
@@ -682,7 +654,7 @@ public abstract class LogMergePolicy ext
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
- sb.append("requireContiguousMerge=").append(requireContiguousMerge);
+ sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MergePolicy.java Sat Apr 2 15:47:12 2011
@@ -72,6 +72,7 @@ public abstract class MergePolicy implem
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
+ long estimatedMergeBytes; // used by IndexWriter
List<SegmentReader> readers; // used by IndexWriter
List<SegmentReader> readerClones; // used by IndexWriter
public final SegmentInfos segments;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Sat Apr 2 15:47:12 2011
@@ -659,7 +659,6 @@ public final class SegmentInfo {
StringBuilder s = new StringBuilder();
s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
-
char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java Sat Apr 2 15:47:12 2011
@@ -55,6 +55,9 @@ public class SegmentReader extends Index
AtomicInteger deletedDocsRef = null;
private boolean deletedDocsDirty = false;
private boolean normsDirty = false;
+
+ // TODO: we should move this tracking into SegmentInfo;
+ // this way SegmentInfo.toString shows pending deletes
private int pendingDeleteCount;
private boolean rollbackHasChanges = false;
@@ -803,8 +806,9 @@ public class SegmentReader extends Index
oldRef.decrementAndGet();
}
deletedDocsDirty = true;
- if (!deletedDocs.getAndSet(docNum))
+ if (!deletedDocs.getAndSet(docNum)) {
pendingDeleteCount++;
+ }
}
@Override
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java?rev=1088051&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java Sat Apr 2 15:47:12 2011
@@ -0,0 +1,667 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Set;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Comparator;
+
+/**
+ * Merges segments of approximately equal size, subject to
+ * an allowed number of segments per tier. This is similar
+ * to {@link LogByteSizeMergePolicy}, except this merge
+ * policy is able to merge non-adjacent segment, and
+ * separates how many segments are merged at once ({@link
+ * #setMaxMergeAtOnce}) from how many segments are allowed
+ * per tier ({@link #setSegmentsPerTier}). This merge
+ * policy also does not over-merge (ie, cascade merges).
+ *
+ * <p>For normal merging, this policy first computes a
+ * "budget" of how many segments are allowed by be in the
+ * index. If the index is over-budget, then the policy
+ * sorts segments by decresing size (pro-rating by percent
+ * deletes), and then finds the least-cost merge. Merge
+ * cost is measured by a combination of the "skew" of the
+ * merge (size of largest seg divided by smallest seg),
+ * total merge size and pct deletes reclaimed,
+ * so that merges with lower skew, smaller size
+ * and those reclaiming more deletes, are
+ * favored.
+ *
+ * <p>If a merge will produce a segment that's larger than
+ * {@link #setMaxMergedSegmentMB}, then the policy will
+ * merge fewer segments (down to 1 at once, if that one has
+ * deletions) to keep the segment size under budget.
+ *
+ * <p<b>NOTE</b>: this policy freely merges non-adjacent
+ * segments; if this is a problem, use {@link
+ * LogMergePolicy}.
+ *
+ * <p><b>NOTE</b>: This policy always merges by byte size
+ * of the segments, always pro-rates by percent deletes,
+ * and does not apply any maximum segment size during
+ * optimize (unlike {@link LogByteSizeMergePolicy}.
+ *
+ * @lucene.experimental
+ */
+
+// TODO
+// - we could try to take into account whether a large
+// merge is already running (under CMS) and then bias
+// ourselves towards picking smaller merges if so (or,
+// maybe CMS should do so)
+
+public class TieredMergePolicy extends MergePolicy {
+
+ private int maxMergeAtOnce = 10;
+ private long maxMergedSegmentBytes = 5*1024*1024*1024L;
+ private int maxMergeAtOnceExplicit = 30;
+
+ private long floorSegmentBytes = 2*1024*1024L;
+ private double segsPerTier = 10.0;
+ private double expungeDeletesPctAllowed = 10.0;
+ private boolean useCompoundFile = true;
+ private double noCFSRatio = 0.1;
+
+ /** Maximum number of segments to be merged at a time
+ * during "normal" merging. For explicit merging (eg,
+ * optimize or expungeDeletes was called), see {@link
+ * #setMaxMergeAtOnceExplicit}. Default is 10. */
+ public TieredMergePolicy setMaxMergeAtOnce(int v) {
+ if (v < 2) {
+ throw new IllegalArgumentException("maxMergeAtOnce must be > 1 (got " + v + ")");
+ }
+ maxMergeAtOnce = v;
+ return this;
+ }
+
+ /** @see #setMaxMergeAtOnce */
+ public int getMaxMergeAtOnce() {
+ return maxMergeAtOnce;
+ }
+
+ // TODO: should addIndexes do explicit merging, too? And,
+ // if user calls IW.maybeMerge "explicitly"
+
+ /** Maximum number of segments to be merged at a time,
+ * during optimize or expungeDeletes. Default is 30. */
+ public TieredMergePolicy setMaxMergeAtOnceExplicit(int v) {
+ if (v < 2) {
+ throw new IllegalArgumentException("maxMergeAtOnceExplicit must be > 1 (got " + v + ")");
+ }
+ maxMergeAtOnceExplicit = v;
+ return this;
+ }
+
+ /** @see #setMaxMergeAtOnceExplicit */
+ public int getMaxMergeAtOnceExplicit() {
+ return maxMergeAtOnceExplicit;
+ }
+
+ /** Maximum sized segment to produce during
+ * normal merging. This setting is approximate: the
+ * estimate of the merged segment size is made by summing
+ * sizes of to-be-merged segments (compensating for
+ * percent deleted docs). Default is 5 GB. */
+ public TieredMergePolicy setMaxMergedSegmentMB(double v) {
+ maxMergedSegmentBytes = (long) (v*1024*1024);
+ return this;
+ }
+
+ /** @see #getMaxMergedSegmentMB */
+ public double getMaxMergedSegmentMB() {
+ return maxMergedSegmentBytes/1024/1024.;
+ }
+
+ /** Segments smaller than this are "rounded up" to this
+ * size, ie treated as equal (floor) size for merge
+ * selection. This is to prevent frequent flushing of
+ * tiny segments from allowing a long tail in the index.
+ * Default is 2 MB. */
+ public TieredMergePolicy setFloorSegmentMB(double v) {
+ if (v <= 0.0) {
+ throw new IllegalArgumentException("floorSegmentMB must be >= 0.0 (got " + v + ")");
+ }
+ floorSegmentBytes = (long) (v*1024*1024);
+ return this;
+ }
+
+ /** @see #setFloorSegmentMB */
+ public double getFloorSegmentMB() {
+ return floorSegmentBytes/1024*1024.;
+ }
+
+ /** When expungeDeletes is called, we only merge away a
+ * segment if its delete percentage is over this
+ * threshold. Default is 10%. */
+ public TieredMergePolicy setExpungeDeletesPctAllowed(double v) {
+ if (v < 0.0 || v > 100.0) {
+ throw new IllegalArgumentException("expungeDeletesPctAllowed must be between 0.0 and 100.0 inclusive (got " + v + ")");
+ }
+ expungeDeletesPctAllowed = v;
+ return this;
+ }
+
+ /** @see #setExpungeDeletesPctAllowed */
+ public double getExpungeDeletesPctAllowed() {
+ return expungeDeletesPctAllowed;
+ }
+
+ /** Sets the allowed number of segments per tier. Smaller
+ * values mean more merging but fewer segments.
+ * setMaxMergeAtOnce} otherwise you'll hit
+ * Default is 10.0. */
+ public TieredMergePolicy setSegmentsPerTier(double v) {
+ if (v < 2.0) {
+ throw new IllegalArgumentException("segmentsPerTier must be >= 2.0 (got " + v + ")");
+ }
+ segsPerTier = v;
+ return this;
+ }
+
+ /** @see #setSegmentsPerTier */
+ public double getSegmentsPerTier() {
+ return segsPerTier;
+ }
+
+ /** Sets whether compound file format should be used for
+ * newly flushed and newly merged segments. Default
+ * true. */
+ public TieredMergePolicy setUseCompoundFile(boolean useCompoundFile) {
+ this.useCompoundFile = useCompoundFile;
+ return this;
+ }
+
+ /** @see #setUseCompoundFile */
+ public boolean getUseCompoundFile() {
+ return useCompoundFile;
+ }
+
+ /** If a merged segment will be more than this percentage
+ * of the total size of the index, leave the segment as
+ * non-compound file even if compound file is enabled.
+ * Set to 1.0 to always use CFS regardless of merge
+ * size. Default is 0.1. */
+ public TieredMergePolicy setNoCFSRatio(double noCFSRatio) {
+ if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
+ throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
+ }
+ this.noCFSRatio = noCFSRatio;
+ return this;
+ }
+
+ /** @see #setNoCFSRatio */
+ public double getNoCFSRatio() {
+ return noCFSRatio;
+ }
+
+ private class SegmentByteSizeDescending implements Comparator<SegmentInfo> {
+ public int compare(SegmentInfo o1, SegmentInfo o2) {
+ try {
+ final long sz1 = size(o1);
+ final long sz2 = size(o2);
+ if (sz1 > sz2) {
+ return -1;
+ } else if (sz2 > sz1) {
+ return 1;
+ } else {
+ return o1.name.compareTo(o2.name);
+ }
+ } catch (IOException ioe) {
+ throw new RuntimeException(ioe);
+ }
+ }
+ }
+
+ private final Comparator<SegmentInfo> segmentByteSizeDescending = new SegmentByteSizeDescending();
+
+ protected static abstract class MergeScore {
+ abstract double getScore();
+ abstract String getExplanation();
+ }
+
+ @Override
+ public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
+ if (verbose()) {
+ message("findMerges: " + infos.size() + " segments");
+ }
+ if (infos.size() == 0) {
+ return null;
+ }
+ final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
+ final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();
+
+ final SegmentInfos infosSorted = new SegmentInfos();
+ infosSorted.addAll(infos);
+
+ Collections.sort(infosSorted, segmentByteSizeDescending);
+
+ // Compute total index bytes & print details about the index
+ long totIndexBytes = 0;
+ long minSegmentBytes = Long.MAX_VALUE;
+ for(SegmentInfo info : infosSorted) {
+ final long segBytes = size(info);
+ if (verbose()) {
+ String extra = merging.contains(info) ? " [merging]" : "";
+ if (segBytes >= maxMergedSegmentBytes/2.0) {
+ extra += " [skip: too large]";
+ } else if (segBytes < floorSegmentBytes) {
+ extra += " [floored]";
+ }
+ message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra);
+ }
+
+ minSegmentBytes = Math.min(segBytes, minSegmentBytes);
+ // Accum total byte size
+ totIndexBytes += segBytes;
+ }
+
+ // If we have too-large segments, grace them out
+ // of the maxSegmentCount:
+ int tooBigCount = 0;
+ while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
+ totIndexBytes -= size(infosSorted.get(tooBigCount));
+ tooBigCount++;
+ }
+
+ minSegmentBytes = floorSize(minSegmentBytes);
+
+ // Compute max allowed segs in the index
+ long levelSize = minSegmentBytes;
+ long bytesLeft = totIndexBytes;
+ double allowedSegCount = 0;
+ while(true) {
+ final double segCountLevel = bytesLeft / (double) levelSize;
+ if (segCountLevel < segsPerTier) {
+ allowedSegCount += Math.ceil(segCountLevel);
+ break;
+ }
+ allowedSegCount += segsPerTier;
+ bytesLeft -= segsPerTier * levelSize;
+ levelSize *= maxMergeAtOnce;
+ }
+ int allowedSegCountInt = (int) allowedSegCount;
+
+ MergeSpecification spec = null;
+
+ // Cycle to possibly select more than one merge:
+ while(true) {
+
+ long mergingBytes = 0;
+
+ // Gather eligible segments for merging, ie segments
+ // not already being merged and not already picked (by
+ // prior iteration of this loop) for merging:
+ final SegmentInfos eligible = new SegmentInfos();
+ for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
+ final SegmentInfo info = infosSorted.get(idx);
+ if (merging.contains(info)) {
+ mergingBytes += info.sizeInBytes(true);
+ } else if (!toBeMerged.contains(info)) {
+ eligible.add(info);
+ }
+ }
+
+ final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes;
+
+ message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount);
+
+ if (eligible.size() == 0) {
+ return spec;
+ }
+
+ if (eligible.size() >= allowedSegCountInt) {
+
+ // OK we are over budget -- find best merge!
+ MergeScore bestScore = null;
+ SegmentInfos best = null;
+ boolean bestTooLarge = false;
+ long bestMergeBytes = 0;
+
+ // Consider all merge starts:
+ for(int startIdx = 0;startIdx <= eligible.size()-maxMergeAtOnce; startIdx++) {
+
+ long totAfterMergeBytes = 0;
+
+ final SegmentInfos candidate = new SegmentInfos();
+ boolean hitTooLarge = false;
+ for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
+ final SegmentInfo info = eligible.info(idx);
+ final long segBytes = size(info);
+
+ if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
+ hitTooLarge = true;
+ // NOTE: we continue, so that we can try
+ // "packing" smaller segments into this merge
+ // to see if we can get closer to the max
+ // size; this in general is not perfect since
+ // this is really "bin packing" and we'd have
+ // to try different permutations.
+ continue;
+ }
+ candidate.add(info);
+ totAfterMergeBytes += segBytes;
+ }
+
+ final MergeScore score = score(candidate, hitTooLarge, mergingBytes);
+ message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.));
+
+ // If we are already running a max sized merge
+ // (maxMergeIsRunning), don't allow another max
+ // sized merge to kick off:
+ if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) {
+ best = candidate;
+ bestScore = score;
+ bestTooLarge = hitTooLarge;
+ bestMergeBytes = totAfterMergeBytes;
+ }
+ }
+
+ if (best != null) {
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+ final OneMerge merge = new OneMerge(best);
+ spec.add(merge);
+ for(SegmentInfo info : merge.segments) {
+ toBeMerged.add(info);
+ }
+
+ if (verbose()) {
+ message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
+ }
+ } else {
+ return spec;
+ }
+ } else {
+ return spec;
+ }
+ }
+ }
+
+ /** Expert: scores one merge; subclasses can override. */
+ protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
+ long totBeforeMergeBytes = 0;
+ long totAfterMergeBytes = 0;
+ long totAfterMergeBytesFloored = 0;
+ for(SegmentInfo info : candidate) {
+ final long segBytes = size(info);
+ totAfterMergeBytes += segBytes;
+ totAfterMergeBytesFloored += floorSize(segBytes);
+ totBeforeMergeBytes += info.sizeInBytes(true);
+ }
+
+ // Measure "skew" of the merge, which can range
+ // from 1.0/numSegsBeingMerged (good) to 1.0
+ // (poor):
+ final double skew;
+ if (hitTooLarge) {
+ // Pretend the merge has perfect skew; skew doesn't
+ // matter in this case because this merge will not
+ // "cascade" and so it cannot lead to N^2 merge cost
+ // over time:
+ skew = 1.0/maxMergeAtOnce;
+ } else {
+ skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored;
+ }
+
+ // Strongly favor merges with less skew (smaller
+ // mergeScore is better):
+ double mergeScore = skew;
+
+ // Gently favor smaller merges over bigger ones. We
+ // don't want to make this exponent too large else we
+ // can end up doing poor merges of small segments in
+ // order to avoid the large merges:
+ mergeScore *= Math.pow(totAfterMergeBytes, 0.05);
+
+ // Strongly favor merges that reclaim deletes:
+ final double nonDelRatio = ((double) totAfterMergeBytes)/totBeforeMergeBytes;
+ mergeScore *= nonDelRatio;
+
+ final double finalMergeScore = mergeScore;
+
+ return new MergeScore() {
+
+ @Override
+ public double getScore() {
+ return finalMergeScore;
+ }
+
+ @Override
+ public String getExplanation() {
+ return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio);
+ }
+ };
+ }
+
+ @Override
+ public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws IOException {
+ if (verbose()) {
+ message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
+ }
+ SegmentInfos eligible = new SegmentInfos();
+ boolean optimizeMergeRunning = false;
+ final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
+ for(SegmentInfo info : infos) {
+ if (segmentsToOptimize.contains(info)) {
+ if (!merging.contains(info)) {
+ eligible.add(info);
+ } else {
+ optimizeMergeRunning = true;
+ }
+ }
+ }
+
+ if (eligible.size() == 0) {
+ return null;
+ }
+
+ if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) ||
+ (maxSegmentCount == 1 && eligible.size() == 1 && isOptimized(eligible.get(0)))) {
+ if (verbose()) {
+ message("already optimized");
+ }
+ return null;
+ }
+
+ Collections.sort(eligible, segmentByteSizeDescending);
+
+ if (verbose()) {
+ message("eligible=" + eligible);
+ message("optimizeMergeRunning=" + optimizeMergeRunning);
+ }
+
+ int end = eligible.size();
+
+ MergeSpecification spec = null;
+
+ // Do full merges, first, backwards:
+ while(end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) {
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+ final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
+ if (verbose()) {
+ message("add merge=" + writer.get().segString(merge.segments));
+ }
+ spec.add(merge);
+ end -= maxMergeAtOnceExplicit;
+ }
+
+ if (spec == null && !optimizeMergeRunning) {
+ // Do final merge
+ final int numToMerge = end - maxSegmentCount + 1;
+ final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
+ if (verbose()) {
+ message("add final merge=" + merge.segString(writer.get().getDirectory()));
+ }
+ spec = new MergeSpecification();
+ spec.add(merge);
+ }
+
+ return spec;
+ }
+
+ @Override
+ public MergeSpecification findMergesToExpungeDeletes(SegmentInfos infos)
+ throws CorruptIndexException, IOException {
+ if (verbose()) {
+ message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
+ }
+ final SegmentInfos eligible = new SegmentInfos();
+ final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
+ for(SegmentInfo info : infos) {
+ double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
+ if (pctDeletes > expungeDeletesPctAllowed && !merging.contains(info)) {
+ eligible.add(info);
+ }
+ }
+
+ if (eligible.size() == 0) {
+ return null;
+ }
+
+ Collections.sort(eligible, segmentByteSizeDescending);
+
+ if (verbose()) {
+ message("eligible=" + eligible);
+ }
+
+ int start = 0;
+ MergeSpecification spec = null;
+
+ while(start < eligible.size()) {
+ long totAfterMergeBytes = 0;
+ int upto = start;
+ boolean done = false;
+ while(upto < start + maxMergeAtOnceExplicit) {
+ if (upto == eligible.size()) {
+ done = true;
+ break;
+ }
+ final SegmentInfo info = eligible.get(upto);
+ final long segBytes = size(info);
+ if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
+ // TODO: we could be smarter here, eg cherry
+ // picking smaller merges that'd sum up to just
+ // around the max size
+ break;
+ }
+ totAfterMergeBytes += segBytes;
+ upto++;
+ }
+
+ if (upto == start) {
+ // Single segment is too big; grace it
+ start++;
+ continue;
+ }
+
+ if (spec == null) {
+ spec = new MergeSpecification();
+ }
+
+ final OneMerge merge = new OneMerge(eligible.range(start, upto));
+ if (verbose()) {
+ message("add merge=" + writer.get().segString(merge.segments));
+ }
+ spec.add(merge);
+ start = upto;
+ if (done) {
+ break;
+ }
+ }
+
+ return spec;
+ }
+
+ @Override
+ public boolean useCompoundFile(SegmentInfos infos, SegmentInfo mergedInfo) throws IOException {
+ final boolean doCFS;
+
+ if (!useCompoundFile) {
+ doCFS = false;
+ } else if (noCFSRatio == 1.0) {
+ doCFS = true;
+ } else {
+ long totalSize = 0;
+ for (SegmentInfo info : infos)
+ totalSize += size(info);
+
+ doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
+ }
+ return doCFS;
+ }
+
+ @Override
+ public void close() {
+ }
+
+ private boolean isOptimized(SegmentInfo info)
+ throws IOException {
+ IndexWriter w = writer.get();
+ assert w != null;
+ boolean hasDeletions = w.numDeletedDocs(info) > 0;
+ return !hasDeletions &&
+ !info.hasSeparateNorms() &&
+ info.dir == w.getDirectory() &&
+ (info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
+ }
+
+ // Segment size in bytes, pro-rated by % deleted
+ private long size(SegmentInfo info) throws IOException {
+ final long byteSize = info.sizeInBytes(true);
+ final int delCount = writer.get().numDeletedDocs(info);
+ final double delRatio = (info.docCount <= 0 ? 0.0f : ((double)delCount / (double)info.docCount));
+ assert delRatio <= 1.0;
+ return (long) (byteSize * (1.0-delRatio));
+ }
+
+ private long floorSize(long bytes) {
+ return Math.max(floorSegmentBytes, bytes);
+ }
+
+ private boolean verbose() {
+ IndexWriter w = writer.get();
+ return w != null && w.verbose();
+ }
+
+ private void message(String message) {
+ if (verbose()) {
+ writer.get().message("TMP: " + message);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("[" + getClass().getSimpleName() + ": ");
+ sb.append("maxMergeAtOnce=").append(maxMergeAtOnce).append(", ");
+ sb.append("maxMergeAtOnceExplicit=").append(maxMergeAtOnceExplicit).append(", ");
+ sb.append("maxMergedSegmentMB=").append(maxMergedSegmentBytes/1024/1024.).append(", ");
+ sb.append("floorSegmentMB=").append(floorSegmentBytes/1024/1024.).append(", ");
+ sb.append("expungeDeletesPctAllowed=").append(expungeDeletesPctAllowed).append(", ");
+ sb.append("segmentsPerTier=").append(segsPerTier).append(", ");
+ sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
+ sb.append("noCFSRatio=").append(noCFSRatio);
+ return sb.toString();
+ }
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java Sat Apr 2 15:47:12 2011
@@ -128,7 +128,7 @@ public class LineFileDocs implements Clo
body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(body);
- id = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
+ id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add(id);
date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java Sat Apr 2 15:47:12 2011
@@ -768,9 +768,11 @@ public abstract class LuceneTestCase ext
}
if (r.nextBoolean()) {
- c.setMergePolicy(new MockRandomMergePolicy(r));
- } else {
+ c.setMergePolicy(newTieredMergePolicy());
+ } else if (r.nextBoolean()) {
c.setMergePolicy(newLogMergePolicy());
+ } else {
+ c.setMergePolicy(new MockRandomMergePolicy(r));
}
c.setReaderPooling(r.nextBoolean());
@@ -782,6 +784,10 @@ public abstract class LuceneTestCase ext
return newLogMergePolicy(random);
}
+ public static TieredMergePolicy newTieredMergePolicy() {
+ return newTieredMergePolicy(random);
+ }
+
public static LogMergePolicy newLogMergePolicy(Random r) {
LogMergePolicy logmp = r.nextBoolean() ? new LogDocMergePolicy() : new LogByteSizeMergePolicy();
logmp.setUseCompoundFile(r.nextBoolean());
@@ -794,17 +800,22 @@ public abstract class LuceneTestCase ext
return logmp;
}
- public static LogMergePolicy newInOrderLogMergePolicy() {
- LogMergePolicy logmp = newLogMergePolicy();
- logmp.setRequireContiguousMerge(true);
- return logmp;
- }
-
- public static LogMergePolicy newInOrderLogMergePolicy(int mergeFactor) {
- LogMergePolicy logmp = newLogMergePolicy();
- logmp.setMergeFactor(mergeFactor);
- logmp.setRequireContiguousMerge(true);
- return logmp;
+ public static TieredMergePolicy newTieredMergePolicy(Random r) {
+ TieredMergePolicy tmp = new TieredMergePolicy();
+ if (r.nextInt(3) == 2) {
+ tmp.setMaxMergeAtOnce(2);
+ tmp.setMaxMergeAtOnceExplicit(2);
+ } else {
+ tmp.setMaxMergeAtOnce(_TestUtil.nextInt(r, 2, 20));
+ tmp.setMaxMergeAtOnceExplicit(_TestUtil.nextInt(r, 2, 30));
+ }
+ tmp.setMaxMergedSegmentMB(0.2 + r.nextDouble() * 2.0);
+ tmp.setFloorSegmentMB(0.2 + r.nextDouble() * 2.0);
+ tmp.setExpungeDeletesPctAllowed(0.0 + r.nextDouble() * 30.0);
+ tmp.setSegmentsPerTier(_TestUtil.nextInt(r, 2, 20));
+ tmp.setUseCompoundFile(r.nextBoolean());
+ tmp.setNoCFSRatio(0.1 + r.nextDouble()*0.8);
+ return tmp;
}
public static LogMergePolicy newLogMergePolicy(boolean useCFS) {
Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Sat Apr 2 15:47:12 2011
@@ -43,10 +43,13 @@ import org.apache.lucene.index.Concurren
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
+import org.junit.Assert;
public class _TestUtil {
@@ -307,9 +310,14 @@ public class _TestUtil {
* count lowish */
public static void reduceOpenFiles(IndexWriter w) {
// keep number of open files lowish
- LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
- lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
-
+ MergePolicy mp = w.getConfig().getMergePolicy();
+ if (mp instanceof LogMergePolicy) {
+ LogMergePolicy lmp = (LogMergePolicy) mp;
+ lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
+ } else if (mp instanceof TieredMergePolicy) {
+ TieredMergePolicy tmp = (TieredMergePolicy) mp;
+ tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
+ }
MergeScheduler ms = w.getConfig().getMergeScheduler();
if (ms instanceof ConcurrentMergeScheduler) {
((ConcurrentMergeScheduler) ms).setMaxThreadCount(2);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Sat Apr 2 15:47:12 2011
@@ -1073,8 +1073,9 @@ public class TestAddIndexes extends Luce
IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) };
Directory dir = new RAMDirectory();
- IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
+ IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy());
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
+ lmp.setUseCompoundFile(true);
lmp.setNoCFSRatio(1.0); // Force creation of CFS
IndexWriter w3 = new IndexWriter(dir, conf);
w3.addIndexes(readers);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java Sat Apr 2 15:47:12 2011
@@ -129,7 +129,7 @@ public class TestAtomicUpdate extends Lu
IndexWriterConfig conf = new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
.setMaxBufferedDocs(7);
- ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(3);
+ ((TieredMergePolicy) conf.getMergePolicy()).setMaxMergeAtOnce(3);
IndexWriter writer = new MockIndexWriter(directory, conf);
writer.setInfoStream(VERBOSE ? System.out : null);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java Sat Apr 2 15:47:12 2011
@@ -619,7 +619,7 @@ public class TestDeletionPolicy extends
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer())
- .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newInOrderLogMergePolicy());
+ .setOpenMode(OpenMode.CREATE).setIndexDeletionPolicy(policy).setMergePolicy(newLogMergePolicy());
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setUseCompoundFile(useCompoundFile);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java Sat Apr 2 15:47:12 2011
@@ -60,7 +60,7 @@ public class TestDocTermOrds extends Luc
public void testSimple() throws Exception {
Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
Field field = newField("field", "", Field.Index.ANALYZED);
doc.add(field);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Sat Apr 2 15:47:12 2011
@@ -116,7 +116,7 @@ public class TestDocsAndPositions extend
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
- MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy()));
+ MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newLogMergePolicy()));
int numDocs = 131;
int max = 1051;
int term = random.nextInt(max);
@@ -197,7 +197,7 @@ public class TestDocsAndPositions extend
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
- MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newInOrderLogMergePolicy()));
+ MockTokenizer.WHITESPACE, true, usePayload)).setMergePolicy(newLogMergePolicy()));
int numDocs = 499;
int max = 15678;
int term = random.nextInt(max);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReader.java Sat Apr 2 15:47:12 2011
@@ -371,7 +371,7 @@ public class TestIndexReader extends Luc
Directory dir = newDirectory();
byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < 10; i++) {
addDoc(writer, "document number " + (i + 1));
@@ -380,7 +380,7 @@ public class TestIndexReader extends Luc
addDocumentWithTermVectorFields(writer);
}
writer.close();
- writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy()));
+ writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
doc.add(new Field("bin1", bin));
doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED));
@@ -417,7 +417,7 @@ public class TestIndexReader extends Luc
// force optimize
- writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy()));
+ writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
writer.optimize();
writer.close();
reader = IndexReader.open(dir, false);
@@ -1163,7 +1163,7 @@ public class TestIndexReader extends Luc
public void testMultiReaderDeletes() throws Exception {
Directory dir = newDirectory();
- RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
+ RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED));
w.addDocument(doc);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java Sat Apr 2 15:47:12 2011
@@ -174,7 +174,7 @@ public class TestIndexReaderReopen exten
private void doTestReopenWithCommit (Random random, Directory dir, boolean withReopen) throws IOException {
IndexWriter iwriter = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode(
- OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newInOrderLogMergePolicy()));
+ OpenMode.CREATE).setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(newLogMergePolicy()));
iwriter.commit();
IndexReader reader = IndexReader.open(dir, false);
try {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java Sat Apr 2 15:47:12 2011
@@ -68,7 +68,7 @@ public class TestIndexWriterConfig exten
assertNull(conf.getMergedSegmentWarmer());
assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor());
- assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass());
+ assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
// Sanity check - validate that all getters are covered.
Set<String> getters = new HashSet<String>();
@@ -246,7 +246,7 @@ public class TestIndexWriterConfig exten
assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
// Test MergePolicy
- assertEquals(LogByteSizeMergePolicy.class, conf.getMergePolicy().getClass());
+ assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
conf.setMergePolicy(new LogDocMergePolicy());
assertEquals(LogDocMergePolicy.class, conf.getMergePolicy().getClass());
conf.setMergePolicy(null);
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Sat Apr 2 15:47:12 2011
@@ -104,7 +104,7 @@ public class TestIndexWriterMergePolicy
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setMaxBufferedDocs(10).
- setMergePolicy(newInOrderLogMergePolicy())
+ setMergePolicy(newLogMergePolicy())
);
for (int i = 0; i < 250; i++) {
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterMerging.java Sat Apr 2 15:47:12 2011
@@ -58,7 +58,7 @@ public class TestIndexWriterMerging exte
IndexWriter writer = new IndexWriter(
merged,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
- setMergePolicy(newInOrderLogMergePolicy(2))
+ setMergePolicy(newLogMergePolicy(2))
);
writer.setInfoStream(VERBOSE ? System.out : null);
writer.addIndexes(indexA, indexB);
@@ -101,7 +101,7 @@ public class TestIndexWriterMerging exte
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).
setOpenMode(OpenMode.CREATE).
setMaxBufferedDocs(2).
- setMergePolicy(newInOrderLogMergePolicy(2))
+ setMergePolicy(newLogMergePolicy(2))
);
for (int i = start; i < (start + numDocs); i++)
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java Sat Apr 2 15:47:12 2011
@@ -46,7 +46,7 @@ import org.apache.lucene.util.ThreadInte
import java.util.concurrent.atomic.AtomicInteger;
public class TestIndexWriterReader extends LuceneTestCase {
- static PrintStream infoStream;
+ static PrintStream infoStream = VERBOSE ? System.out : null;
public static int count(Term t, IndexReader r) throws IOException {
int count = 0;
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java?rev=1088051&r1=1088050&r2=1088051&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java Sat Apr 2 15:47:12 2011
@@ -47,7 +47,7 @@ public class TestMaxTermFrequency extend
super.setUp();
dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newInOrderLogMergePolicy());
+ new MockAnalyzer(MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy());
config.setSimilarityProvider(new DefaultSimilarityProvider() {
@Override
public Similarity get(String field) {