You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2007/10/05 22:51:56 UTC

svn commit: r582384 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/index/ src/test/org/apache/lucene/index/

Author: mikemccand
Date: Fri Oct  5 13:51:53 2007
New Revision: 582384

URL: http://svn.apache.org/viewvc?rev=582384&view=rev
Log:
LUCENE-1013: fix IndexWriter.setMaxMergeDocs(N) to work out-of-the-box again

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
    lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java
    lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Oct  5 13:51:53 2007
@@ -381,6 +381,12 @@
 23. LUCENE-913: Two consecutive score() calls return different 
     scores for Boolean Queries. (Michael Busch, Doron Cohen)
 
+24. LUCENE-1013: Fix IndexWriter.setMaxMergeDocs to work "out of the
+    box", again, by moving set/getMaxMergeDocs up from
+    LogDocMergePolicy into LogMergePolicy.  This fixes the API
+    breakage (non backwards compatible change) caused by LUCENE-994.
+    (Yonik Seeley via Mike McCandless)
+
 New features
 
  1. LUCENE-759: Added two n-gram-producing TokenFilters.

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Fri Oct  5 13:51:53 2007
@@ -327,13 +327,6 @@
       throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy");
   }
 
-  private LogDocMergePolicy getLogDocMergePolicy() {
-    if (mergePolicy instanceof LogDocMergePolicy)
-      return (LogDocMergePolicy) mergePolicy;
-    else
-      throw new IllegalArgumentException("this method can only be called when the merge policy is LogDocMergePolicy");
-  }
-
   /** <p>Get the current setting of whether newly flushed
    *  segments will use the compound file format.  Note that
    *  this just returns the value previously set with
@@ -794,7 +787,7 @@
    * Otherwise an IllegalArgumentException is thrown.</p>
    */
   public void setMaxMergeDocs(int maxMergeDocs) {
-    getLogDocMergePolicy().setMaxMergeDocs(maxMergeDocs);
+    getLogMergePolicy().setMaxMergeDocs(maxMergeDocs);
   }
 
    /**
@@ -809,7 +802,7 @@
    * @see #setMaxMergeDocs
    */
   public int getMaxMergeDocs() {
-    return getLogDocMergePolicy().getMaxMergeDocs();
+    return getLogMergePolicy().getMaxMergeDocs();
   }
 
   /**

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Fri Oct  5 13:51:53 2007
@@ -41,7 +41,9 @@
 
   /** Sets the maximum size for a segment to be merged.
    *  When a segment is this size or larger it will never be
-   *  merged. */
+   *  merged.  Note that {@link #setMaxMergeDocs} is also
+   *  used to check whether a segment is too large for
+   *  merging (it's either or). */
   public void setMaxMergeMB(double mb) {
     maxMergeSize = (long) (mb*1024*1024);
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogDocMergePolicy.java Fri Oct  5 13:51:53 2007
@@ -26,30 +26,16 @@
   /** Default minimum segment size.  @see setMinMergeDocs */
   public static final int DEFAULT_MIN_MERGE_DOCS = 1000;
 
-  /** Default maximum segment size.  A segment of this size
-   *  or larger will never be merged.  @see setMaxMergeDocs */
-  public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
-
   public LogDocMergePolicy() {
     super();
     minMergeSize = DEFAULT_MIN_MERGE_DOCS;
-    maxMergeSize = DEFAULT_MAX_MERGE_DOCS;
+
+    // maxMergeSize is never used by LogDocMergePolicy; set
+    // it to Long.MAX_VALUE to disable it
+    maxMergeSize = Long.MAX_VALUE;
   }
   protected long size(SegmentInfo info) {
     return info.docCount;
-  }
-
-  /** Sets the maximum size for a segment to be merged.
-   *  When a segment is this size or larger it will never be
-   *  merged. */
-  public void setMaxMergeDocs(int maxMergeDocs) {
-    maxMergeSize = maxMergeDocs;
-  }
-
-  /** Get the maximum size for a segment to be merged.
-   *  @see #setMaxMergeDocs */
-  public int getMaxMergeDocs() {
-    return (int) maxMergeSize;
   }
 
   /** Sets the minimum size for the lowest level segments.

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/LogMergePolicy.java Fri Oct  5 13:51:53 2007
@@ -49,10 +49,15 @@
    *  merged at a time */
   public static final int DEFAULT_MERGE_FACTOR = 10;
 
+  /** Default maximum segment size.  A segment of this size
+   *  or larger will never be merged.  @see setMaxMergeDocs */
+  public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
+
   private int mergeFactor = DEFAULT_MERGE_FACTOR;
 
   long minMergeSize;
   long maxMergeSize;
+  int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
   private boolean useCompoundFile = true;
   private boolean useCompoundDocStore = true;
@@ -219,6 +224,9 @@
       long size = size(info);
 
       // Refuse to import a segment that's too large
+      if (info.docCount > maxMergeDocs && info.dir != directory)
+        throw new IllegalArgumentException("Segment is too large (" + info.docCount + " docs vs max docs " + maxMergeDocs + ")");
+
       if (size >= maxMergeSize && info.dir != directory)
         throw new IllegalArgumentException("Segment is too large (" + size + " vs max size " + maxMergeSize + ")");
 
@@ -281,8 +289,10 @@
       int end = start + mergeFactor;
       while(end <= 1+upto) {
         boolean anyTooLarge = false;
-        for(int i=start;i<end;i++)
-          anyTooLarge |= size(infos.info(i)) >= maxMergeSize;
+        for(int i=start;i<end;i++) {
+          final SegmentInfo info = infos.info(i);
+          anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
+        }
 
         if (!anyTooLarge) {
           if (spec == null)
@@ -298,4 +308,18 @@
 
     return spec;
   }
+
+  /** Sets the maximum docs for a segment to be merged.
+   *  When a segment has this many docs or more it will never be
+   *  merged. */
+  public void setMaxMergeDocs(int maxMergeDocs) {
+    this.maxMergeDocs = maxMergeDocs;
+  }
+
+  /** Get the maximum docs for a segment to be merged.
+   *  @see #setMaxMergeDocs */
+  public int getMaxMergeDocs() {
+    return maxMergeDocs;
+  }
+
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=582384&r1=582383&r2=582384&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Oct  5 13:51:53 2007
@@ -1583,4 +1583,39 @@
     iw.close();
     dir.close();
   }
+
+  // Just intercepts all merges & verifies that we are never
+  // merging a segment with >= 20 (maxMergeDocs) docs
+  private class MyMergeScheduler implements MergeScheduler {
+    synchronized public void merge(IndexWriter writer)
+      throws CorruptIndexException, IOException {
+
+      while(true) {
+        MergePolicy.OneMerge merge = writer.getNextMerge();
+        if (merge == null)
+          break;
+        for(int i=0;i<merge.segments.size();i++)
+          assert merge.segments.info(i).docCount < 20;
+        writer.merge(merge);
+      }
+    }
+
+    public void close() {}
+  }
+
+  // LUCENE-1013
+  public void testSetMaxMergeDocs() throws IOException {
+    MockRAMDirectory dir = new MockRAMDirectory();
+    IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
+    iw.setMergeScheduler(new MyMergeScheduler());
+    iw.setMaxMergeDocs(20);
+    iw.setMaxBufferedDocs(2);
+    iw.setMergeFactor(2);
+    Document document = new Document();
+    document.add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED,
+                           Field.TermVector.YES));
+    for(int i=0;i<177;i++)
+      iw.addDocument(document);
+    iw.close();
+  }
 }