You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/12/20 23:49:52 UTC

svn commit: r1051309 - in /lucene/dev/branches/bulkpostings: ./ lucene/ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ lucene/contrib/benchmark/src...

Author: rmuir
Date: Mon Dec 20 22:49:51 2010
New Revision: 1051309

URL: http://svn.apache.org/viewvc?rev=1051309&view=rev
Log:
merge trunk (1050789:1051305)

Modified:
    lucene/dev/branches/bulkpostings/   (props changed)
    lucene/dev/branches/bulkpostings/lucene/   (props changed)
    lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
    lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
    lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
    lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
    lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
    lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
    lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java
    lucene/dev/branches/bulkpostings/solr/   (props changed)

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocData.java Mon Dec 20 22:49:51 2010
@@ -29,6 +29,7 @@ public class DocData {
   private String body;
   private String title;
   private String date;
+  private int id;
   private Properties props;
   
   public void clear() {
@@ -37,6 +38,7 @@ public class DocData {
     title = null;
     date = null;
     props = null;
+    id = -1;
   }
   
   public String getBody() {
@@ -57,6 +59,10 @@ public class DocData {
     return name;
   }
 
+  public int getID() {
+    return id;
+  }
+
   public Properties getProps() {
     return props;
   }
@@ -85,6 +91,10 @@ public class DocData {
     this.name = name;
   }
 
+  public void setID(int id) {
+    this.id = id;
+  }
+
   public void setProps(Properties props) {
     this.props = props;
   }

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Mon Dec 20 22:49:51 2010
@@ -20,14 +20,21 @@ package org.apache.lucene.benchmark.byTa
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.HashMap;
+import java.util.Calendar;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Locale;
 import java.util.Random;
+import java.util.Date;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.text.SimpleDateFormat;
+import java.text.ParsePosition;
 
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.benchmark.byTask.utils.Format;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
 import org.apache.lucene.document.Field.Index;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.Field.TermVector;
@@ -82,6 +89,7 @@ public class DocMaker {
   static class DocState {
     
     private final Map<String,Field> fields;
+    private final Map<String,NumericField> numericFields;
     private final boolean reuseFields;
     final Document doc;
     DocData docData = new DocData();
@@ -92,6 +100,7 @@ public class DocMaker {
       
       if (reuseFields) {
         fields =  new HashMap<String,Field>();
+        numericFields = new HashMap<String,NumericField>();
         
         // Initialize the map with the default fields.
         fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector));
@@ -99,9 +108,13 @@ public class DocMaker {
         fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
         fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
         fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector));
+
+        numericFields.put(DATE_MSEC_FIELD, new NumericField(DATE_MSEC_FIELD));
+        numericFields.put(TIME_SEC_FIELD, new NumericField(TIME_SEC_FIELD));
         
         doc = new Document();
       } else {
+        numericFields = null;
         fields = null;
         doc = null;
       }
@@ -124,18 +137,42 @@ public class DocMaker {
       }
       return f;
     }
+
+    NumericField getNumericField(String name) {
+      if (!reuseFields) {
+        return new NumericField(name);
+      }
+
+      NumericField f = numericFields.get(name);
+      if (f == null) {
+        f = new NumericField(name);
+        numericFields.put(name, f);
+      }
+      return f;
+    }
   }
   
-  private int numDocsCreated = 0;
   private boolean storeBytes = false;
 
+  private static class DateUtil {
+    public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US);
+    public Calendar cal = Calendar.getInstance();
+    public ParsePosition pos = new ParsePosition(0);
+    public DateUtil() {
+      parser.setLenient(true);
+    }
+  }
+
   // leftovers are thread local, because it is unsafe to share residues between threads
   private ThreadLocal<LeftOver> leftovr = new ThreadLocal<LeftOver>();
   private ThreadLocal<DocState> docState = new ThreadLocal<DocState>();
+  private ThreadLocal<DateUtil> dateParsers = new ThreadLocal<DateUtil>();
 
   public static final String BODY_FIELD = "body";
   public static final String TITLE_FIELD = "doctitle";
   public static final String DATE_FIELD = "docdate";
+  public static final String DATE_MSEC_FIELD = "docdatenum";
+  public static final String TIME_SEC_FIELD = "doctimesecnum";
   public static final String ID_FIELD = "docid";
   public static final String BYTES_FIELD = "bytes";
   public static final String NAME_FIELD = "docname";
@@ -155,6 +192,7 @@ public class DocMaker {
   private int lastPrintedNumUniqueTexts = 0;
 
   private long lastPrintedNumUniqueBytes = 0;
+  private final AtomicInteger numDocsCreated = new AtomicInteger();
 
   private int printNum = 0;
 
@@ -169,7 +207,16 @@ public class DocMaker {
     
     // Set ID_FIELD
     Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal);
-    idField.setValue("doc" + (r != null ? r.nextInt(updateDocIDLimit) : incrNumDocsCreated()));
+    int id;
+    if (r != null) {
+      id = r.nextInt(updateDocIDLimit);
+    } else {
+      id = docData.getID();
+      if (id == -1) {
+        id = numDocsCreated.getAndIncrement();
+      }
+    }
+    idField.setValue(Integer.toString(id));
     doc.add(idField);
     
     // Set NAME_FIELD
@@ -181,13 +228,39 @@ public class DocMaker {
     doc.add(nameField);
     
     // Set DATE_FIELD
-    String date = docData.getDate();
+    DateUtil util = dateParsers.get();
+    if (util == null) {
+      util = new DateUtil();
+      dateParsers.set(util);
+    }
+    Date date = null;
+    String dateString = docData.getDate();
+    if (dateString != null) {
+      util.pos.setIndex(0);
+      date = util.parser.parse(dateString, util.pos);
+      //System.out.println(dateString + " parsed to " + date);
+    } else {
+      dateString = "";
+    }
+    Field dateStringField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
+    dateStringField.setValue(dateString);
+    doc.add(dateStringField);
+
     if (date == null) {
-      date = "";
+      // just set to right now
+      date = new Date();
     }
-    Field dateField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal);
-    dateField.setValue(date);
+
+    NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD);
+    dateField.setLongValue(date.getTime());
     doc.add(dateField);
+
+    util.cal.setTime(date);
+    final int sec = util.cal.get(Calendar.HOUR_OF_DAY)*3600 + util.cal.get(Calendar.MINUTE)*60 + util.cal.get(Calendar.SECOND);
+
+    NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD);
+    timeSecField.setIntValue(sec);
+    doc.add(timeSecField);
     
     // Set TITLE_FIELD
     String title = docData.getTitle();
@@ -252,10 +325,6 @@ public class DocMaker {
     return ds;
   }
 
-  protected synchronized int incrNumDocsCreated() {
-    return numDocsCreated++;
-  }
-
   /**
    * Closes the {@link DocMaker}. The base implementation closes the
    * {@link ContentSource}, and it can be overridden to do more work (but make
@@ -363,7 +432,7 @@ public class DocMaker {
     // re-initiate since properties by round may have changed.
     setConfig(config);
     source.resetInputs();
-    numDocsCreated = 0;
+    numDocsCreated.set(0);
     resetLeftovers();
   }
   

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java Mon Dec 20 22:49:51 2010
@@ -48,6 +48,7 @@ public class LineDocSource extends Conte
 
   private File file;
   private BufferedReader reader;
+  private int readCount;
 
   private synchronized void openFile() {
     try {
@@ -71,9 +72,12 @@ public class LineDocSource extends Conte
   
   @Override
   public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
-    String line;
+    final String line;
+    final int myID;
+    
     synchronized(this) {
       line = reader.readLine();
+      myID = readCount++;
       if (line == null) {
         if (!forever) {
           throw new NoMoreDataException();
@@ -96,6 +100,7 @@ public class LineDocSource extends Conte
     }
     // The date String was written in the format of DateTools.dateToString.
     docData.clear();
+    docData.setID(myID);
     docData.setBody(line.substring(1 + spot2, line.length()));
     docData.setTitle(line.substring(0, spot));
     docData.setDate(line.substring(1 + spot, spot2));

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java Mon Dec 20 22:49:51 2010
@@ -26,6 +26,7 @@ import org.apache.lucene.benchmark.byTas
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
 
 /**
  * Simple task to test performance of tokenizers.  It just
@@ -67,7 +68,7 @@ public class ReadTokensTask extends Perf
     Analyzer analyzer = getRunData().getAnalyzer();
     int tokenCount = 0;
     for(final Fieldable field : fields) {
-      if (!field.isTokenized()) continue;
+      if (!field.isTokenized() || field instanceof NumericField) continue;
       
       final TokenStream stream;
       final TokenStream streamValue = field.tokenStreamValue();

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Mon Dec 20 22:49:51 2010
@@ -475,8 +475,9 @@ public class TestPerfTasksLogic extends 
     FieldsEnum fields = MultiFields.getFields(reader).iterator();
     String fieldName = null;
     while((fieldName = fields.next()) != null) {
-      if (fieldName == DocMaker.ID_FIELD)
+      if (fieldName == DocMaker.ID_FIELD || fieldName == DocMaker.DATE_MSEC_FIELD || fieldName == DocMaker.TIME_SEC_FIELD) {
         continue;
+      }
       TermsEnum terms = fields.terms();
       DocsEnum docs = null;
       while(terms.next() != null) {

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java Mon Dec 20 22:49:51 2010
@@ -56,7 +56,7 @@ public class BalancedSegmentMergePolicy 
   
   @Override
   protected long size(SegmentInfo info) throws IOException {
-    long byteSize = info.sizeInBytes();
+    long byteSize = info.sizeInBytes(true);
     float delRatio = (info.docCount <= 0 ? 0.0f : ((float)info.getDelCount() / (float)info.docCount));
     return (info.docCount <= 0 ?  byteSize : (long)((1.0f - delRatio) * byteSize));
   }

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/IndexSplitter.java Mon Dec 20 22:49:51 2010
@@ -26,7 +26,6 @@ import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.FSDirectory;
 
@@ -108,7 +107,7 @@ public class IndexSplitter {
     DecimalFormat formatter = new DecimalFormat("###,###.###");
     for (int x = 0; x < infos.size(); x++) {
       SegmentInfo info = infos.info(x);
-      String sizeStr = formatter.format(info.sizeInBytes());
+      String sizeStr = formatter.format(info.sizeInBytes(true));
       System.out.println(info.name + " " + sizeStr);
     }
   }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java Mon Dec 20 22:49:51 2010
@@ -417,8 +417,8 @@ public class CheckIndex {
         segInfoStat.hasProx = info.getHasProx();
         msg("    numFiles=" + info.files().size());
         segInfoStat.numFiles = info.files().size();
-        msg("    size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));
-        segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
+        segInfoStat.sizeMB = info.sizeInBytes(true)/(1024.*1024.);
+        msg("    size (MB)=" + nf.format(segInfoStat.sizeMB));
         Map<String,String> diagnostics = info.getDiagnostics();
         segInfoStat.diagnostics = diagnostics;
         if (diagnostics.size() > 0) {

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Mon Dec 20 22:49:51 2010
@@ -65,7 +65,6 @@ public class ConcurrentMergeScheduler ex
 
   protected Directory dir;
 
-  private boolean closed;
   protected IndexWriter writer;
   protected int mergeThreadCount;
 
@@ -147,18 +146,37 @@ public class ConcurrentMergeScheduler ex
    *  pause & unpause threads. */
   protected synchronized void updateMergeThreads() {
 
-    CollectionUtil.mergeSort(mergeThreads, compareByMergeDocCount);
+    // Only look at threads that are alive & not in the
+    // process of stopping (ie have an active merge):
+    final List<MergeThread> activeMerges = new ArrayList<MergeThread>();
+
+    int threadIdx = 0;
+    while (threadIdx < mergeThreads.size()) {
+      final MergeThread mergeThread = mergeThreads.get(threadIdx);
+      if (!mergeThread.isAlive()) {
+        // Prune any dead threads
+        mergeThreads.remove(threadIdx);
+        continue;
+      }
+      if (mergeThread.getCurrentMerge() != null) {
+        activeMerges.add(mergeThread);
+      }
+      threadIdx++;
+    }
+
+    CollectionUtil.mergeSort(activeMerges, compareByMergeDocCount);
     
-    final int count = mergeThreads.size();
     int pri = mergeThreadPriority;
-    for(int i=0;i<count;i++) {
-      final MergeThread mergeThread = mergeThreads.get(i);
+    final int activeMergeCount = activeMerges.size();
+    for (threadIdx=0;threadIdx<activeMergeCount;threadIdx++) {
+      final MergeThread mergeThread = activeMerges.get(threadIdx);
       final MergePolicy.OneMerge merge = mergeThread.getCurrentMerge();
-      if (merge == null) {
+      if (merge == null) { 
         continue;
       }
+
       final boolean doPause;
-      if (i < count-maxThreadCount) {
+      if (threadIdx < activeMergeCount-maxThreadCount) {
         doPause = true;
       } else {
         doPause = false;
@@ -208,23 +226,29 @@ public class ConcurrentMergeScheduler ex
 
   @Override
   public void close() {
-    closed = true;
+    sync();
   }
 
-  public synchronized void sync() {
-    while(mergeThreadCount() > 0) {
-      if (verbose())
-        message("now wait for threads; currently " + mergeThreads.size() + " still running");
-      final int count = mergeThreads.size();
-      if (verbose()) {
-        for(int i=0;i<count;i++)
-          message("    " + i + ": " + mergeThreads.get(i));
+  /** Wait for any running merge threads to finish */
+  public void sync() {
+    while(true) {
+      MergeThread toSync = null;
+      synchronized(this) {
+        for(MergeThread t : mergeThreads) {
+          if (t.isAlive()) {
+            toSync = t;
+            break;
+          }
+        }
       }
-      
-      try {
-        wait();
-      } catch (InterruptedException ie) {
-        throw new ThreadInterruptedException(ie);
+      if (toSync != null) {
+        try {
+          toSync.join();
+        } catch (InterruptedException ie) {
+          throw new ThreadInterruptedException(ie);
+        }
+      } else {
+        break;
       }
     }
   }
@@ -232,9 +256,12 @@ public class ConcurrentMergeScheduler ex
   private synchronized int mergeThreadCount() {
     int count = 0;
     final int numThreads = mergeThreads.size();
-    for(int i=0;i<numThreads;i++)
-      if (mergeThreads.get(i).isAlive())
+    for(int i=0;i<numThreads;i++) {
+      final MergeThread t = mergeThreads.get(i);
+      if (t.isAlive() && t.getCurrentMerge() != null) {
         count++;
+      }
+    }
     return count;
   }
 
@@ -311,11 +338,17 @@ public class ConcurrentMergeScheduler ex
           // merge:
           merger = getMergeThread(writer, merge);
           mergeThreads.add(merger);
-          updateMergeThreads();
-          if (verbose())
+          if (verbose()) {
             message("    launch new thread [" + merger.getName() + "]");
+          }
 
           merger.start();
+
+          // Must call this after starting the thread else
+          // the new thread is removed from mergeThreads
+          // (since it's not alive yet):
+          updateMergeThreads();
+
           success = true;
         }
       } finally {
@@ -408,8 +441,6 @@ public class ConcurrentMergeScheduler ex
             if (verbose())
               message("  merge thread: do another merge " + merge.segString(dir));
           } else {
-            done = true;
-            updateMergeThreads();
             break;
           }
         }
@@ -428,11 +459,10 @@ public class ConcurrentMergeScheduler ex
           }
         }
       } finally {
+        done = true;
         synchronized(ConcurrentMergeScheduler.this) {
-          ConcurrentMergeScheduler.this.notifyAll();
-          boolean removed = mergeThreads.remove(this);
-          assert removed;
           updateMergeThreads();
+          ConcurrentMergeScheduler.this.notifyAll();
         }
       }
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Dec 20 22:49:51 2010
@@ -592,7 +592,7 @@ final class DocumentsWriter {
         threads.add(threadState.consumer);
       }
 
-      long startNumBytesUsed = bytesUsed();
+      double startMBUsed = bytesUsed()/1024./1024.;
 
       consumer.flush(threads, flushState);
       newSegment.setHasVectors(flushState.hasVectors);
@@ -622,11 +622,13 @@ final class DocumentsWriter {
 
       if (infoStream != null) {
         message("flush: segment=" + newSegment);
-        final long newSegmentSize = newSegment.sizeInBytes();
-        message("  ramUsed=" + nf.format(startNumBytesUsed / 1024. / 1024.) + " MB" +
-            " newFlushedSize=" + nf.format(newSegmentSize / 1024 / 1024) + " MB" +
-            " docs/MB=" + nf.format(numDocs / (newSegmentSize / 1024. / 1024.)) +
-            " new/old=" + nf.format(100.0 * newSegmentSize / startNumBytesUsed) + "%");
+        final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.;
+        final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.;
+        message("  ramUsed=" + nf.format(startMBUsed) + " MB" +
+                " newFlushedSize=" + nf.format(newSegmentSize) + " MB" +
+                " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" +
+                " docs/MB=" + nf.format(numDocs / newSegmentSize) +
+                " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%");
       }
 
       success = true;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java Mon Dec 20 22:49:51 2010
@@ -830,7 +830,7 @@ public class IndexWriter implements Clos
   private FieldInfos getCurrentFieldInfos() throws IOException {
     final FieldInfos fieldInfos;
     if (segmentInfos.size() > 0) {
-      if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_4_0) {
+      if (segmentInfos.getFormat() > DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
         // Pre-4.0 index.  In this case we sweep all
         // segments, merging their FieldInfos:
         fieldInfos = new FieldInfos();
@@ -2923,17 +2923,10 @@ public class IndexWriter implements Clos
     if (merge.isAborted())
       return;
 
-    boolean hasVectors = false;
-    for (SegmentInfo sourceSegment : merge.segments) {
-      if (sourceSegment.getHasVectors()) {
-        hasVectors = true;
-      }
-    }
-
     // Bind a new segment name here so even with
     // ConcurrentMergePolicy we keep deterministic segment
     // names.
-    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, hasVectors);
+    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false);
 
     Map<String,String> details = new HashMap<String,String>();
     details.put("optimize", Boolean.toString(merge.optimize));
@@ -3070,13 +3063,15 @@ public class IndexWriter implements Clos
     SegmentInfos sourceSegments = merge.segments;
     final int numSegments = sourceSegments.size();
 
-    if (infoStream != null)
-      message("merging " + merge.segString(directory));
-
     SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
                                              codecs, payloadProcessorProvider,
                                              ((FieldInfos) docWriter.getFieldInfos().clone()));
 
+    if (infoStream != null) {
+      message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
+    }
+
+    merge.info.setHasVectors(merger.fieldInfos().hasVectors());
     merge.readers = new SegmentReader[numSegments];
     merge.readersClone = new SegmentReader[numSegments];
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Dec 20 22:49:51 2010
@@ -184,7 +184,7 @@ public abstract class LogMergePolicy ext
   }
   
   protected long sizeBytes(SegmentInfo info) throws IOException {
-    long byteSize = info.sizeInBytes();
+    long byteSize = info.sizeInBytes(true);
     if (calibrateSizeByDeletes) {
       int delCount = writer.get().numDeletedDocs(info);
       double delRatio = (info.docCount <= 0 ? 0.0f : ((float)delCount / (float)info.docCount));

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Dec 20 22:49:51 2010
@@ -165,7 +165,7 @@ public abstract class MergePolicy implem
     public long totalBytesSize() throws IOException {
       long total = 0;
       for (SegmentInfo info : segments) {
-        total += info.sizeInBytes();
+        total += info.sizeInBytes(true);
       }
       return total;
     }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Mon Dec 20 22:49:51 2010
@@ -220,13 +220,16 @@ public final class SegmentInfo {
   
   /** Returns total size in bytes of all of files used by
    *  this segment. */
-  public long sizeInBytes() throws IOException {
+  public long sizeInBytes(boolean includeDocStores) throws IOException {
     if (sizeInBytes == -1) {
       List<String> files = files();
       final int size = files.size();
       sizeInBytes = 0;
       for(int i=0;i<size;i++) {
         final String fileName = files.get(i);
+        if (!includeDocStores && IndexFileNames.isDocStoreFile(fileName)) {
+          continue;
+        }
         // We don't count bytes used by a shared doc store
         // against this segment:
         if (docStoreOffset == -1 || !IndexFileNames.isDocStoreFile(fileName))

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DefaultSegmentInfosWriter.java Mon Dec 20 22:49:51 2010
@@ -35,13 +35,13 @@ public class DefaultSegmentInfosWriter e
    *  diagnostics storage, and switches userData to Map */
   public static final int FORMAT_DIAGNOSTICS = -9;
 
+  /** Each segment records whether it has term vectors */
+  public static final int FORMAT_HAS_VECTORS = -10;
+
   /** Each segment records whether its postings are written
    *  in the new flex format */
   public static final int FORMAT_4_0 = -11;
 
-  /** Each segment records whether it has term vectors */
-  public static final int FORMAT_HAS_VECTORS = -10;
-
   /** This must always point to the most recent file format.
    * whenever you add a new format, make it 1 smaller (negative version logic)! */
   public static final int FORMAT_CURRENT = FORMAT_4_0;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java Mon Dec 20 22:49:51 2010
@@ -57,7 +57,7 @@ public abstract class TermsIndexReaderBa
 
     public abstract void getIndexOffset(long ord, TermsIndexResult result) throws IOException;
 
-    /** Call this sequentially for each term encoutered,
+    /** Call this sequentially for each term encountered,
      *  after calling {@link #getIndexOffset}. */
     public abstract boolean isIndexTerm(long ord, int docFreq, boolean onlyLoaded) throws IOException;
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/NumericUtils.java Mon Dec 20 22:49:51 2010
@@ -172,7 +172,7 @@ public final class NumericUtils {
   public static int getPrefixCodedLongShift(final BytesRef val) {
     final int shift = val.bytes[val.offset] - SHIFT_START_LONG;
     if (shift > 63 || shift < 0)
-      throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)");
+      throw new NumberFormatException("Invalid shift value (" + shift + ") in prefixCoded bytes (is encoded value really an INT?)");
     return shift;
   }
 

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java Mon Dec 20 22:49:51 2010
@@ -184,6 +184,7 @@ public class TestConcurrentMergeSchedule
       writer = new IndexWriter(directory, newIndexWriterConfig(
           TEST_VERSION_CURRENT, new MockAnalyzer())
           .setOpenMode(OpenMode.APPEND).setMaxBufferedDocs(2));
+      writer.setInfoStream(VERBOSE ? System.out : null);
     }
 
     writer.close();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Mon Dec 20 22:49:51 2010
@@ -851,10 +851,14 @@ public class TestIndexWriterExceptions e
     w.close();
 
     for(int i=0;i<200;i++) {
+      if (VERBOSE) {
+        System.out.println("TEST: iter " + i);
+      }
       MockDirectoryWrapper dir = new MockDirectoryWrapper(random, new RAMDirectory(startDir));
       conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()).setMergeScheduler(new ConcurrentMergeScheduler());
       ((ConcurrentMergeScheduler) conf.getMergeScheduler()).setSuppressExceptions();
       w = new IndexWriter(dir, conf);
+      w.setInfoStream(VERBOSE ? System.out : null);
       dir.setRandomIOExceptionRate(0.5);
       try {
         w.optimize();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java Mon Dec 20 22:49:51 2010
@@ -59,7 +59,7 @@ public class TestSizeBoundedOptimize ext
 
     SegmentInfos sis = new SegmentInfos();
     sis.read(dir);
-    double min = sis.info(0).sizeInBytes();
+    double min = sis.info(0).sizeInBytes(true);
 
     conf = newWriterConfig();
     LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/function/FunctionTestSetup.java Mon Dec 20 22:49:51 2010
@@ -93,6 +93,9 @@ public class FunctionTestSetup extends L
   @Before
   public void setUp() throws Exception {
     super.setUp();
+    if (VERBOSE) {
+      System.out.println("TEST: setUp");
+    }
     // prepare a small index with just a few documents.  
     dir = newDirectory();
     anlzr = new MockAnalyzer();
@@ -101,6 +104,7 @@ public class FunctionTestSetup extends L
       iwc.setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 7));
     }
     RandomIndexWriter iw = new RandomIndexWriter(random, dir, iwc);
+    iw.w.setInfoStream(VERBOSE ? System.out : null);
     // add docs not exactly in natural ID order, to verify we do check the order of docs by scores
     int remaining = N_DOCS;
     boolean done[] = new boolean[N_DOCS];
@@ -115,9 +119,15 @@ public class FunctionTestSetup extends L
       remaining --;
     }
     if (!doMultiSegment) {
+      if (VERBOSE) {
+        System.out.println("TEST: setUp optimize");
+      }
       iw.optimize();
     }
     iw.close();
+    if (VERBOSE) {
+      System.out.println("TEST: setUp done close");
+    }
   }
 
   private void addDoc(RandomIndexWriter iw, int i) throws Exception {

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java?rev=1051309&r1=1051308&r2=1051309&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/MockDirectoryWrapper.java Mon Dec 20 22:49:51 2010
@@ -18,18 +18,20 @@ package org.apache.lucene.store;
  */
 
 import java.io.Closeable;
-import java.io.IOException;
 import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.IdentityHashMap;
 import java.util.Iterator;
-import java.util.Random;
 import java.util.Map;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.Random;
 import java.util.Set;
-import java.util.ArrayList;
+
+import org.apache.lucene.util.LuceneTestCase;
 
 /**
  * This is a Directory Wrapper that adds methods
@@ -220,6 +222,10 @@ public class MockDirectoryWrapper extend
     if (randomIOExceptionRate > 0.0) {
       int number = Math.abs(randomState.nextInt() % 1000);
       if (number < randomIOExceptionRate*1000) {
+        if (LuceneTestCase.VERBOSE) {
+          System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception");
+          new Throwable().printStackTrace(System.out);
+        }
         throw new IOException("a random IOException");
       }
     }