You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/11/24 20:03:08 UTC

svn commit: r1038767 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/ lucene/backwards/src/test/org/apache/lucene/analysis/ lucene/backwards/src/test/org/apache/lucene/document/ lucene/backwards/src/test/org/apache/lucene/index/ lu...

Author: mikemccand
Date: Wed Nov 24 19:03:05 2010
New Revision: 1038767

URL: http://svn.apache.org/viewvc?rev=1038767&view=rev
Log:
LUCENE-2773: don't build compound files for large merged segments (by default); default maxMergeMB to 2GB

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestDateTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/document/TestNumberTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/org/apache/lucene/util/TestAttributeSource.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemmer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/tartarus/snowball/TestApp.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/smartcn/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/benchmark/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/icu/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/icu/lib/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyAnalyzer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/BaseCharFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LengthFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LetterTokenizer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/Tokenizer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/StringHelper.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPage.html   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/LuceneResourcesWikiPageURLs.txt   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/email.addresses.from.random.text.with.email.addresses.txt   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/random.text.with.email.addresses.txt   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/random.text.with.urls.txt   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/urls.from.random.text.with.urls.txt   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/collation/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/document/TestDateTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/document/TestNumberTools.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/example/   (props changed)
    lucene/dev/branches/branch_3x/solr/lib/commons-httpclient-3.1.jar   (props changed)
    lucene/dev/branches/branch_3x/solr/lib/jcl-over-slf4j-1.5.5.jar   (props changed)
    lucene/dev/branches/branch_3x/solr/src/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/common/org/apache/solr/common/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/ShingleFilterFactory.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymFilter.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/SynonymMap.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/analysis/WordDelimiterIterator.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/response/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/maven/solr-core-pom.xml.template   (props changed)
    lucene/dev/branches/branch_3x/solr/src/maven/solr-solrj-pom.xml.template   (props changed)
    lucene/dev/branches/branch_3x/solr/src/solrj/org/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestShingleFilterFactory.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/client/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/handler/component/QueryElevationComponentTest.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/test/org/apache/solr/update/AutoCommitTest.java   (props changed)
    lucene/dev/branches/branch_3x/solr/src/webapp/src/org/apache/solr/client/solrj/embedded/   (props changed)
    lucene/dev/branches/branch_3x/solr/src/webapp/web/admin/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Wed Nov 24 19:03:05 2010
@@ -118,6 +118,15 @@ Changes in runtime behavior
   worst-case free disk space required during optimize is now 3X the
   index size, when compound file is enabled (else 2X).  (Mike
   McCandless)
+
+* LUCENE-2773: LogMergePolicy accepts a double noCFSRatio (default =
+  0.1), which means any time a merged segment is greater than 10% of
+  the index size, it will be left in non-compound format even if
+  compound format is on.  Also, the default value for maxMergeSize was
+  changed from unlimited to 2GB, meaning segments larger than 2GB will
+  never be merged by default.  These changes were made to reduce peak
+  transient disk usage during optimize which increased due to
+  LUCENE-2762.  (Mike McCandless)
   
 API Changes
 
@@ -221,6 +230,15 @@ API Changes
 * LUCENE-2699: Update StandardTokenizer and UAX29Tokenizer to Unicode 6.0.0.
   (Steven Rowe)
    
+* LUCENE-2773: LogMergePolicy accepts a double noCFSRatio (default =
+  0.1), which means any time a merged segment is greater than 10% of
+  the index size, it will be left in non-compound format even if
+  compound format is on.  Also, the default value for maxMergeSize was
+  changed from unlimited to 2GB, meaning segments larger than 2GB will
+  never be merged by default.  These changes were made to reduce peak
+  transient disk usage during optimize which increased due to
+  LUCENE-2762.  (Mike McCandless)
+  
 Bug fixes
 
 * LUCENE-2216: OpenBitSet.hashCode returned different hash codes for

Modified: lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Wed Nov 24 19:03:05 2010
@@ -17,35 +17,36 @@
 
 package org.apache.lucene.benchmark.byTask;
 
-import java.io.StringReader;
+import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
-import java.io.BufferedReader;
+import java.io.StringReader;
 import java.text.Collator;
 import java.util.List;
 import java.util.Locale;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
-import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
-import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
+import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
+import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.collation.CollationKeyAnalyzer;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LogDocMergePolicy;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.SerialMergeScheduler;
-import org.apache.lucene.index.LogDocMergePolicy;
 import org.apache.lucene.index.TermFreqVector;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.Directory;
@@ -768,12 +769,9 @@ public class TestPerfTasksLogic extends 
     ir.close();
 
     // Make sure we have 3 segments:
-    final String[] files = benchmark.getRunData().getDirectory().listAll();
-    int cfsCount = 0;
-    for(int i=0;i<files.length;i++)
-      if (files[i].endsWith(".cfs"))
-        cfsCount++;
-    assertEquals(3, cfsCount);
+    SegmentInfos infos = new SegmentInfos();
+    infos.read(benchmark.getRunData().getDirectory());
+    assertEquals(3, infos.size());
   }
   
   /**

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java Wed Nov 24 19:03:05 2010
@@ -28,7 +28,7 @@ public class LogByteSizeMergePolicy exte
 
   /** Default maximum segment size.  A segment of this size
    *  or larger will never be merged.  @see setMaxMergeMB */
-  public static final double DEFAULT_MAX_MERGE_MB = Long.MAX_VALUE;
+  public static final double DEFAULT_MAX_MERGE_MB = 2048;
 
   public LogByteSizeMergePolicy() {
     minMergeSize = (long) (DEFAULT_MIN_MERGE_MB*1024*1024);

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Nov 24 19:03:05 2010
@@ -54,12 +54,19 @@ public abstract class LogMergePolicy ext
    *  or larger will never be merged.  @see setMaxMergeDocs */
   public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;
 
+  /** Default noCFSRatio.  If a merge's size is >= 10% of
+   *  the index, then we disable compound file for it.
+   *  @see setNoCFSRatio */
+  public static final double DEFAULT_NO_CFS_RATIO = 0.1;
+
   protected int mergeFactor = DEFAULT_MERGE_FACTOR;
 
   protected long minMergeSize;
   protected long maxMergeSize;
   protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
+  protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
+
   protected boolean calibrateSizeByDeletes = true;
   
   protected boolean useCompoundFile = true;
@@ -73,6 +80,23 @@ public abstract class LogMergePolicy ext
     IndexWriter w = writer.get();
     return w != null && w.verbose();
   }
+
+  /** @see setNoCFSRatio */
+  public double getNoCFSRatio() {
+    return noCFSRatio;
+  }
+
+  /** If a merged segment will be more than this percentage
+   *  of the total size of the index, leave the segment as
+   *  non-compound file even if compound file is enabled.
+   *  Set to 1.0 to always use CFS regardless of merge
+   *  size. */
+  public void setNoCFSRatio(double noCFSRatio) {
+    if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
+      throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
+    }
+    this.noCFSRatio = noCFSRatio;
+  }
   
   protected void message(String message) {
     if (verbose())
@@ -207,7 +231,8 @@ public abstract class LogMergePolicy ext
     return !hasDeletions &&
       !info.hasSeparateNorms() &&
       info.dir == w.getDirectory() &&
-      info.getUseCompoundFile() == useCompoundFile;
+      (info.getUseCompoundFile() == useCompoundFile ||
+       (noCFSRatio != 1.0 && !info.getUseCompoundFile()));
   }
 
   /**
@@ -230,12 +255,12 @@ public abstract class LogMergePolicy ext
         // unless there is only 1 which is optimized.
         if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
           // there is more than 1 segment to the right of this one, or an unoptimized single segment.
-          spec.add(new OneMerge(infos.range(start + 1, last), useCompoundFile));
+          spec.add(makeOneMerge(infos, infos.range(start + 1, last)));
         }
         last = start;
       } else if (last - start == mergeFactor) {
         // mergeFactor eligible segments were found, add them as a merge.
-        spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
+        spec.add(makeOneMerge(infos, infos.range(start, last)));
         last = start;
       }
       --start;
@@ -243,7 +268,7 @@ public abstract class LogMergePolicy ext
 
     // Add any left-over segments, unless there is just 1 already optimized.
     if (last > 0 && (++start + 1 < last || !isOptimized(infos.info(start)))) {
-      spec.add(new OneMerge(infos.range(start, last), useCompoundFile));
+      spec.add(makeOneMerge(infos, infos.range(start, last)));
     }
 
     return spec.merges.size() == 0 ? null : spec;
@@ -260,7 +285,7 @@ public abstract class LogMergePolicy ext
     // First, enroll all "full" merges (size
     // mergeFactor) to potentially be run concurrently:
     while (last - maxNumSegments + 1 >= mergeFactor) {
-      spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile));
+      spec.add(makeOneMerge(infos, infos.range(last-mergeFactor, last)));
       last -= mergeFactor;
     }
 
@@ -272,7 +297,7 @@ public abstract class LogMergePolicy ext
         // Since we must optimize down to 1 segment, the
         // choice is simple:
         if (last > 1 || !isOptimized(infos.info(0))) {
-          spec.add(new OneMerge(infos.range(0, last), useCompoundFile));
+          spec.add(makeOneMerge(infos, infos.range(0, last)));
         }
       } else if (last > maxNumSegments) {
 
@@ -301,7 +326,7 @@ public abstract class LogMergePolicy ext
           }
         }
 
-        spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile));
+        spec.add(makeOneMerge(infos, infos.range(bestStart, bestStart+finalMergeSize)));
       }
     }
     return spec.merges.size() == 0 ? null : spec;
@@ -389,7 +414,7 @@ public abstract class LogMergePolicy ext
           // deletions, so force a merge now:
           if (verbose())
             message("  add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
-          spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
+          spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i)));
           firstSegmentWithDeletions = i;
         }
       } else if (firstSegmentWithDeletions != -1) {
@@ -398,7 +423,7 @@ public abstract class LogMergePolicy ext
         // mergeFactor segments
         if (verbose())
           message("  add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
-        spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile));
+        spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, i)));
         firstSegmentWithDeletions = -1;
       }
     }
@@ -406,7 +431,7 @@ public abstract class LogMergePolicy ext
     if (firstSegmentWithDeletions != -1) {
       if (verbose())
         message("  add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
-      spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile));
+      spec.add(makeOneMerge(segmentInfos, segmentInfos.range(firstSegmentWithDeletions, numSegments)));
     }
 
     return spec;
@@ -506,7 +531,7 @@ public abstract class LogMergePolicy ext
             spec = new MergeSpecification();
           if (verbose())
             message("    " + start + " to " + end + ": add this merge");
-          spec.add(new OneMerge(infos.range(start, end), useCompoundFile));
+          spec.add(makeOneMerge(infos, infos.range(start, end)));
         } else if (verbose())
           message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
 
@@ -520,6 +545,29 @@ public abstract class LogMergePolicy ext
     return spec;
   }
 
+  protected OneMerge makeOneMerge(SegmentInfos infos, SegmentInfos infosToMerge) throws IOException {
+    final boolean doCFS;
+    if (!useCompoundFile) {
+      doCFS = false;
+    } else if (noCFSRatio == 1.0) {
+      doCFS = true;
+    } else {
+      
+      long totSize = 0;
+      for(SegmentInfo info : infos) {
+        totSize += size(info);
+      }
+      long mergeSize = 0;
+      for(SegmentInfo info : infosToMerge) {
+        mergeSize += size(info);
+      }
+
+      doCFS = mergeSize <= noCFSRatio * totSize;
+    }
+
+    return new OneMerge(infosToMerge, doCFS);
+  }
+
   /** <p>Determines the largest segment (measured by
    * document count) that may be merged with other segments.
    * Small values (e.g., less than 10,000) are best for

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java Wed Nov 24 19:03:05 2010
@@ -2501,13 +2501,13 @@ public class TestIndexWriter extends Luc
       files = Arrays.asList(dir.listAll());
       assertTrue(files.contains("_0.cfs"));
       // optimize created this
-      assertTrue(files.contains("_2.cfs"));
+      //assertTrue(files.contains("_2.cfs"));
       w.deleteUnusedFiles();
 
       files = Arrays.asList(dir.listAll());
       // r still holds this file open
       assertTrue(files.contains("_0.cfs"));
-      assertTrue(files.contains("_2.cfs"));
+      //assertTrue(files.contains("_2.cfs"));
 
       r.close();
       if (iter == 0) {
@@ -2520,7 +2520,7 @@ public class TestIndexWriter extends Luc
         files = Arrays.asList(dir.listAll());
         assertFalse(files.contains("_0.cfs"));
       }
-      assertTrue(files.contains("_2.cfs"));
+      //assertTrue(files.contains("_2.cfs"));
 
       w.close();
       r2.close();

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Wed Nov 24 19:03:05 2010
@@ -27,7 +27,6 @@ import java.io.Reader;
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.store.IndexInput;
@@ -1004,6 +1003,7 @@ public class TestIndexWriterExceptions e
 
       writer  = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()));
       ((LogMergePolicy) writer.getMergePolicy()).setUseCompoundFile(true);
+      ((LogMergePolicy) writer.getMergePolicy()).setNoCFSRatio(1.0);
 
       // add 100 documents
       for (int i = 0; i < 100; i++) {

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java?rev=1038767&r1=1038766&r2=1038767&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java Wed Nov 24 19:03:05 2010
@@ -252,25 +252,5 @@ public class TestIndexWriterMergePolicy 
     if (upperBound * mergeFactor <= maxMergeDocs) {
       assertTrue(numSegments < mergeFactor);
     }
-
-    String[] files = writer.getDirectory().listAll();
-    int segmentCfsCount = 0;
-    for (int i = 0; i < files.length; i++) {
-      if (files[i].endsWith(".cfs")) {
-        segmentCfsCount++;
-      }
-    }
-    assertEquals("index=" + writer.segString(), segmentCount, segmentCfsCount);
-  }
-
-  /*
-  private void printSegmentDocCounts(IndexWriter writer) {
-    int segmentCount = writer.getSegmentCount();
-    System.out.println("" + segmentCount + " segments total");
-    for (int i = 0; i < segmentCount; i++) {
-      System.out.println("  segment " + i + " has " + writer.getDocCount(i)
-          + " docs");
-    }
   }
-  */
 }