You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/10/05 22:51:56 UTC
svn commit: r582384 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/
Author: mikemccand
Date: Fri Oct 5 13:51:53 2007
New Revision: 582384
URL: http://svn.apache.org/viewvc?rev=582384&view=rev
Log:
LUCENE-1013: fix IndexWriter.setMaxMergeDocs(N) to work out-of-the-box again
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java
lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Oct 5 13:51:53 2007
@@ -381,6 +381,12 @@
23. LUCENE-913: Two consecutive score() calls return different
scores for Boolean Queries. (Michael Busch, Doron Cohen)
+24. LUCENE-1013: Fix IndexWriter.setMaxMergeDocs to work "out of the
+ box", again, by moving set/getMaxMergeDocs up from
+ LogDocMergePolicy into LogMergePolicy. This fixes the API
+ breakage (non backwards compatible change) caused by LUCENE-994.
+ (Yonik Seeley via Mike McCandless)
+
New features
1. LUCENE-759: Added two n-gram-producing TokenFilters.
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Fri Oct 5 13:51:53 2007
@@ -327,13 +327,6 @@
throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
}
- private LogDocMergePolicy getLogDocMergePolicy() {
- if (mergePolicy instanceof LogDocMergePolicy)
- return (LogDocMergePolicy) mergePolicy;
- else
- throw new IllegalArgumentException("this method can only be called when the merge policy is LogDocMergePolicy");
- }
-
/** <p>Get the current setting of whether newly flushed
* segments will use the compound file format. Note that
* this just returns the value previously set with
@@ -794,7 +787,7 @@
* Otherwise an IllegalArgumentException is thrown.</p>
*/
public void setMaxMergeDocs(int maxMergeDocs) {
- getLogDocMergePolicy().setMaxMergeDocs(maxMergeDocs);
+ getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
}
/**
@@ -809,7 +802,7 @@
* @see #setMaxMergeDocs
*/
public int getMaxMergeDocs() {
- return getLogDocMergePolicy().getMaxMergeDocs();
+ return getLogMergePolicy().getMaxMergeDocs();
}
/**
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Fri Oct 5 13:51:53 2007
@@ -41,7 +41,9 @@
/** Sets the maximum size for a segment to be merged.
* When a segment is this size or larger it will never be
- * merged. */
+ * merged. Note that {@link #setMaxMergeDocs} is also
+ * used to check whether a segment is too large for
+ * merging (it's either or). */
public void setMaxMergeMB(double mb) {
maxMergeSize = (long) (mb*1024*1024);
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java Fri Oct 5 13:51:53 2007
@@ -26,30 +26,16 @@
/** Default minimum segment size. @see setMinMergeDocs */
public static final int DEFAULT_MIN_MERGE_DOCS = 1000;
- /** Default maximum segment size. A segment of this size
- * or larger will never be merged. @see setMaxMergeDocs */
- public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
-
public LogDocMergePolicy() {
super();
minMergeSize = DEFAULT_MIN_MERGE_DOCS;
- maxMergeSize = DEFAULT_MAX_MERGE_DOCS;
+
+ // maxMergeSize is never used by LogDocMergePolicy; set
+ // it to Long.MAX_VALUE to disable it
+ maxMergeSize = Long.MAX_VALUE;
}
protected long size(SegmentInfo info) {
return info.docCount;
- }
-
- /** Sets the maximum size for a segment to be merged.
- * When a segment is this size or larger it will never be
- * merged. */
- public void setMaxMergeDocs(int maxMergeDocs) {
- maxMergeSize = maxMergeDocs;
- }
-
- /** Get the maximum size for a segment to be merged.
- * @see #setMaxMergeDocs */
- public int getMaxMergeDocs() {
- return (int) maxMergeSize;
}
/** Sets the minimum size for the lowest level segments.
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java Fri Oct 5 13:51:53 2007
@@ -49,10 +49,15 @@
* merged at a time */
public static final int DEFAULT_MERGE_FACTOR = 10;
+ /** Default maximum segment size. A segment of this size
+ * or larger will never be merged. @see setMaxMergeDocs */
+ public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
+
private int mergeFactor = DEFAULT_MERGE_FACTOR;
long minMergeSize;
long maxMergeSize;
+ int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
private boolean useCompoundFile = true;
private boolean useCompoundDocStore = true;
@@ -219,6 +224,9 @@
long size = size(info);
// Refuse to import a segment that's too large
+ if (info.docCount > maxMergeDocs && info.dir != directory)
+ throw new IllegalArgumentException("Segment is too large (" + info.docCount + " docs vs max docs " + maxMergeDocs + ")");
+
if (size >= maxMergeSize && info.dir != directory)
throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")");
@@ -281,8 +289,10 @@
int end = start + mergeFactor;
while(end <= 1+upto) {
boolean anyTooLarge = false;
- for(int i=start;i<end;i++)
- anyTooLarge |= size(infos.info(i)) >= maxMergeSize;
+ for(int i=start;i<end;i++) {
+ final SegmentInfo info = infos.info(i);
+ anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
+ }
if (!anyTooLarge) {
if (spec == null)
@@ -298,4 +308,18 @@
return spec;
}
+
+ /** Sets the maximum docs for a segment to be merged.
+ * When a segment has this many docs or more it will never be
+ * merged. */
+ public void setMaxMergeDocs(int maxMergeDocs) {
+ this.maxMergeDocs = maxMergeDocs;
+ }
+
+ /** Get the maximum docs for a segment to be merged.
+ * @see #setMaxMergeDocs */
+ public int getMaxMergeDocs() {
+ return maxMergeDocs;
+ }
+
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Oct 5 13:51:53 2007
@@ -1583,4 +1583,39 @@
iw.close();
dir.close();
}
+
+ // Just intercepts all merges & verifies that we are never
+ // merging a segment with >= 20 (maxMergeDocs) docs
+ private class MyMergeScheduler implements MergeScheduler {
+ synchronized public void merge(IndexWriter writer)
+ throws CorruptIndexException, IOException {
+
+ while(true) {
+ MergePolicy.OneMerge merge = writer.getNextMerge();
+ if (merge == null)
+ break;
+ for(int i=0;i<merge.segments.size();i++)
+ assert merge.segments.info(i).docCount < 20;
+ writer.merge(merge);
+ }
+ }
+
+ public void close() {}
+ }
+
+ // LUCENE-1013
+ public void testSetMaxMergeDocs() throws IOException {
+ MockRAMDirectory dir = new MockRAMDirectory();
+ IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
+ iw.setMergeScheduler(new MyMergeScheduler());
+ iw.setMaxMergeDocs(20);
+ iw.setMaxBufferedDocs(2);
+ iw.setMergeFactor(2);
+ Document document = new Document();
+ document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED,
+ Field.TermVector.YES));
+ for(int i=0;i<177;i++)
+ iw.addDocument(document);
+ iw.close();
+ }
}