You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/02 15:51:22 UTC

svn commit: r1098566 [7/22] - in /lucene/dev/branches/docvalues: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/lucene/contrib...

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Mon May  2 13:50:57 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.index;
  */
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.IndexSearcher;
@@ -41,7 +41,7 @@ import org.apache.lucene.util.Version;
  * IndexWriterConfig conf = new IndexWriterConfig(analyzer);
  * conf.setter1().setter2();
  * </pre>
- * 
+ *
  * @since 3.1
  */
 public final class IndexWriterConfig implements Cloneable {
@@ -56,7 +56,7 @@ public final class IndexWriterConfig imp
    * </ul>
    */
   public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
-  
+
   /** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
   public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
 
@@ -77,23 +77,19 @@ public final class IndexWriterConfig imp
 
   /**
    * Default value for the write lock timeout (1,000 ms).
-   * 
+   *
    * @see #setDefaultWriteLockTimeout(long)
    */
   public static long WRITE_LOCK_TIMEOUT = 1000;
 
-  /** The maximum number of simultaneous threads that may be
-   *  indexing documents at once in IndexWriter; if more
-   *  than this many threads arrive they will wait for
-   *  others to finish. */
-  public final static int DEFAULT_MAX_THREAD_STATES = 8;
-
   /** Default setting for {@link #setReaderPooling}. */
   public final static boolean DEFAULT_READER_POOLING = false;
 
   /** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
   public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;
 
+  /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
+  public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
   /**
    * Sets the default (for any instance) maximum time to wait for a write lock
    * (in milliseconds).
@@ -105,7 +101,7 @@ public final class IndexWriterConfig imp
   /**
    * Returns the default write lock timeout for newly instantiated
    * IndexWriterConfigs.
-   * 
+   *
    * @see #setDefaultWriteLockTimeout(long)
    */
   public static long getDefaultWriteLockTimeout() {
@@ -127,10 +123,12 @@ public final class IndexWriterConfig imp
   private volatile IndexReaderWarmer mergedSegmentWarmer;
   private volatile CodecProvider codecProvider;
   private volatile MergePolicy mergePolicy;
-  private volatile int maxThreadStates;
+  private volatile DocumentsWriterPerThreadPool indexerThreadPool;
   private volatile boolean readerPooling;
   private volatile int readerTermsIndexDivisor;
-  
+  private volatile FlushPolicy flushPolicy;
+  private volatile int perThreadHardLimitMB;
+
   private Version matchVersion;
 
   /**
@@ -153,15 +151,16 @@ public final class IndexWriterConfig imp
     maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
     ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
     maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
-    indexingChain = DocumentsWriter.defaultIndexingChain;
+    indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
     mergedSegmentWarmer = null;
     codecProvider = CodecProvider.getDefault();
-    mergePolicy = new LogByteSizeMergePolicy();
-    maxThreadStates = DEFAULT_MAX_THREAD_STATES;
+    mergePolicy = new TieredMergePolicy();
     readerPooling = DEFAULT_READER_POOLING;
+    indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
     readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
+    perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
   }
-  
+
   @Override
   public Object clone() {
     // Shallow clone is the only thing that's possible, since parameters like
@@ -186,7 +185,7 @@ public final class IndexWriterConfig imp
     this.openMode = openMode;
     return this;
   }
-  
+
   /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
   public OpenMode getOpenMode() {
     return openMode;
@@ -261,7 +260,7 @@ public final class IndexWriterConfig imp
   public SimilarityProvider getSimilarityProvider() {
     return similarityProvider;
   }
-  
+
   /**
    * Expert: set the interval between indexed terms. Large values cause less
    * memory to be used by IndexReader, but slow random-access to terms. Small
@@ -281,7 +280,7 @@ public final class IndexWriterConfig imp
    * In particular, <code>numUniqueTerms/interval</code> terms are read into
    * memory by an IndexReader, and, on average, <code>interval/2</code> terms
    * must be scanned for each random term access.
-   * 
+   *
    * @see #DEFAULT_TERM_INDEX_INTERVAL
    *
    * <p>Takes effect immediately, but only applies to newly
@@ -293,7 +292,7 @@ public final class IndexWriterConfig imp
 
   /**
    * Returns the interval between indexed terms.
-   * 
+   *
    * @see #setTermIndexInterval(int)
    */
   public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
@@ -331,10 +330,10 @@ public final class IndexWriterConfig imp
     this.writeLockTimeout = writeLockTimeout;
     return this;
   }
-  
+
   /**
    * Returns allowed timeout when acquiring the write lock.
-   * 
+   *
    * @see #setWriteLockTimeout(long)
    */
   public long getWriteLockTimeout() {
@@ -343,15 +342,16 @@ public final class IndexWriterConfig imp
 
   /**
    * Determines the minimal number of delete terms required before the buffered
-   * in-memory delete terms are applied and flushed. If there are documents
-   * buffered in memory at the time, they are merged and a new segment is
-   * created.
-
-   * <p>Disabled by default (writer flushes by RAM usage).
+   * in-memory delete terms and queries are applied and flushed.
+   * <p>Disabled by default (writer flushes by RAM usage).</p>
+   * <p>
+   * NOTE:  This setting won't trigger a segment flush.
+   * </p>
    * 
    * @throws IllegalArgumentException if maxBufferedDeleteTerms
    * is enabled but smaller than 1
    * @see #setRAMBufferSizeMB
+   * @see #setFlushPolicy(FlushPolicy)
    *
    * <p>Takes effect immediately, but only the next time a
    * document is added, updated or deleted.
@@ -366,9 +366,9 @@ public final class IndexWriterConfig imp
   }
 
   /**
-   * Returns the number of buffered deleted terms that will trigger a flush if
-   * enabled.
-   * 
+   * Returns the number of buffered deleted terms that will trigger a flush of all
+   * buffered deletes if enabled.
+   *
    * @see #setMaxBufferedDeleteTerms(int)
    */
   public int getMaxBufferedDeleteTerms() {
@@ -380,45 +380,50 @@ public final class IndexWriterConfig imp
    * and deletions before they are flushed to the Directory. Generally for
    * faster indexing performance it's best to flush by RAM usage instead of
    * document count and use as large a RAM buffer as you can.
-   * 
    * <p>
    * When this is set, the writer will flush whenever buffered documents and
    * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
    * triggering a flush due to RAM usage. Note that if flushing by document
    * count is also enabled, then the flush will be triggered by whichever comes
    * first.
-   * 
+   * <p>
+   * The maximum RAM limit is inherently determined by the JVMs available memory.
+   * Yet, an {@link IndexWriter} session can consume a significantly larger amount
+   * of memory than the given RAM limit since this limit is just an indicator when
+   * to flush memory resident documents to the Directory. Flushes are likely happen
+   * concurrently while other threads adding documents to the writer. For application
+   * stability the available memory in the JVM should be significantly larger than
+   * the RAM buffer used for indexing.
    * <p>
    * <b>NOTE</b>: the account of RAM usage for pending deletions is only
    * approximate. Specifically, if you delete by Query, Lucene currently has no
    * way to measure the RAM usage of individual Queries so the accounting will
    * under-estimate and you should compensate by either calling commit()
    * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
-   * to flush by count instead of RAM usage (each buffered delete Query counts 
-   * as one).
-   * 
+   * to flush and apply buffered deletes by count instead of RAM usage
+   * (for each buffered delete Query a constant number of bytes is used to estimate
+   * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will
+   * not trigger any segment flushes.
+   * <p>
+   * <b>NOTE</b>: It's not guaranteed that all memory resident documents are flushed 
+   * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a
+   * subset of the buffered documents are flushed and therefore only parts of the RAM
+   * buffer is released.    
    * <p>
-   * <b>NOTE</b>: because IndexWriter uses <code>int</code>s when managing its
-   * internal storage, the absolute maximum value for this setting is somewhat
-   * less than 2048 MB. The precise limit depends on various factors, such as
-   * how large your documents are, how many fields have norms, etc., so it's
-   * best to set this value comfortably under 2048.
    * 
-   * <p>
    * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
-   * 
+   * @see #setFlushPolicy(FlushPolicy)
+   * @see #setRAMPerThreadHardLimitMB(int)
+   *
    * <p>Takes effect immediately, but only the next time a
    * document is added, updated or deleted.
    *
    * @throws IllegalArgumentException
    *           if ramBufferSize is enabled but non-positive, or it disables
    *           ramBufferSize when maxBufferedDocs is already disabled
+   *           
    */
   public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
-    if (ramBufferSizeMB > 2048.0) {
-      throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
-          + " is too large; should be comfortably less than 2048");
-    }
     if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
       throw new IllegalArgumentException(
           "ramBufferSize should be > 0.0 MB when enabled");
@@ -438,22 +443,22 @@ public final class IndexWriterConfig imp
    * Determines the minimal number of documents required before the buffered
    * in-memory documents are flushed as a new Segment. Large values generally
    * give faster indexing.
-   * 
+   *
    * <p>
    * When this is set, the writer will flush every maxBufferedDocs added
    * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
    * flush due to number of buffered documents. Note that if flushing by RAM
    * usage is also enabled, then the flush will be triggered by whichever comes
    * first.
-   * 
+   *
    * <p>
    * Disabled by default (writer flushes by RAM usage).
-   * 
+   *
    * <p>Takes effect immediately, but only the next time a
    * document is added, updated or deleted.
    *
    * @see #setRAMBufferSizeMB(double)
-   * 
+   * @see #setFlushPolicy(FlushPolicy)
    * @throws IllegalArgumentException
    *           if maxBufferedDocs is enabled but smaller than 2, or it disables
    *           maxBufferedDocs when ramBufferSize is already disabled
@@ -473,7 +478,7 @@ public final class IndexWriterConfig imp
   /**
    * Returns the number of buffered added documents that will trigger a flush if
    * enabled.
-   * 
+   *
    * @see #setMaxBufferedDocs(int)
    */
   public int getMaxBufferedDocs() {
@@ -519,32 +524,43 @@ public final class IndexWriterConfig imp
     return codecProvider;
   }
 
-  
+
   /**
    * Returns the current MergePolicy in use by this writer.
-   * 
+   *
    * @see #setMergePolicy(MergePolicy)
    */
   public MergePolicy getMergePolicy() {
     return mergePolicy;
   }
 
-  /**
-   * Sets the max number of simultaneous threads that may be indexing documents
-   * at once in IndexWriter. Values &lt; 1 are invalid and if passed
-   * <code>maxThreadStates</code> will be set to
-   * {@link #DEFAULT_MAX_THREAD_STATES}.
-   *
-   * <p>Only takes effect when IndexWriter is first created. */
-  public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
-    this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
+  /** Expert: Sets the {@link DocumentsWriterPerThreadPool} instance used by the
+   * IndexWriter to assign thread-states to incoming indexing threads. If no
+   * {@link DocumentsWriterPerThreadPool} is set {@link IndexWriter} will use
+   * {@link ThreadAffinityDocumentsWriterThreadPool} with max number of
+   * thread-states set to {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES} (see
+   * {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES}).
+   * </p>
+   * <p>
+   * NOTE: The given {@link DocumentsWriterPerThreadPool} instance must not be used with
+   * other {@link IndexWriter} instances once it has been initialized / associated with an
+   * {@link IndexWriter}.
+   * </p>
+   * <p>
+   * NOTE: This only takes effect when IndexWriter is first created.</p>*/
+  public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) {
+    if(threadPool == null) {
+      throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul");
+    }
+    this.indexerThreadPool = threadPool;
     return this;
   }
 
-  /** Returns the max number of simultaneous threads that
-   *  may be indexing documents at once in IndexWriter. */
-  public int getMaxThreadStates() {
-    return maxThreadStates;
+  /** Returns the configured {@link DocumentsWriterPerThreadPool} instance.
+   * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool)
+   * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/
+  public DocumentsWriterPerThreadPool getIndexerThreadPool() {
+    return this.indexerThreadPool;
   }
 
   /** By default, IndexWriter does not pool the
@@ -572,10 +588,10 @@ public final class IndexWriterConfig imp
    *
    * <p>Only takes effect when IndexWriter is first created. */
   IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
-    this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
+    this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain;
     return this;
   }
-  
+
   /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
   IndexingChain getIndexingChain() {
     return indexingChain;
@@ -604,6 +620,53 @@ public final class IndexWriterConfig imp
     return readerTermsIndexDivisor;
   }
   
+  /**
+   * Expert: Controls when segments are flushed to disk during indexing.
+   * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized
+   * the given instance is bound to this {@link IndexWriter} and should not be used with another writer.
+   * @see #setMaxBufferedDeleteTerms(int)
+   * @see #setMaxBufferedDocs(int)
+   * @see #setRAMBufferSizeMB(double)
+   */
+  public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
+    this.flushPolicy = flushPolicy;
+    return this;
+  }
+
+  /**
+   * Expert: Sets the maximum memory consumption per thread triggering a forced
+   * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed
+   * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has
+   * not been exceeded. This is a safety limit to prevent a
+   * {@link DocumentsWriterPerThread} from address space exhaustion due to its
+   * internal 32 bit signed integer based memory addressing.
+   * The given value must be less that 2GB (2048MB)
+   * 
+   * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB
+   */
+  public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) {
+    if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) {
+      throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB");
+    }
+    this.perThreadHardLimitMB = perThreadHardLimitMB;
+    return this;
+  }
+
+  /**
+   * Returns the max amount of memory each {@link DocumentsWriterPerThread} can
+   * consume until forcefully flushed.
+   * @see #setRAMPerThreadHardLimitMB(int) 
+   */
+  public int getRAMPerThreadHardLimitMB() {
+    return perThreadHardLimitMB;
+  }
+  /**
+   * @see #setFlushPolicy(FlushPolicy)
+   */
+  public FlushPolicy getFlushPolicy() {
+    return flushPolicy;
+  }
+
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
@@ -623,9 +686,13 @@ public final class IndexWriterConfig imp
     sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
     sb.append("codecProvider=").append(codecProvider).append("\n");
     sb.append("mergePolicy=").append(mergePolicy).append("\n");
-    sb.append("maxThreadStates=").append(maxThreadStates).append("\n");
+    sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n");
     sb.append("readerPooling=").append(readerPooling).append("\n");
     sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n");
+    sb.append("flushPolicy=").append(flushPolicy).append("\n");
+    sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n");
+
     return sb.toString();
   }
+
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IntBlockPool.java Mon May  2 13:50:57 2011
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import java.util.Arrays;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -22,24 +24,24 @@ final class IntBlockPool {
   public int[][] buffers = new int[10][];
 
   int bufferUpto = -1;                        // Which buffer we are upto
-  public int intUpto = DocumentsWriter.INT_BLOCK_SIZE;             // Where we are in head buffer
+  public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE;             // Where we are in head buffer
 
   public int[] buffer;                              // Current head buffer
-  public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE;          // Current head offset
+  public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE;          // Current head offset
 
-  final private DocumentsWriter docWriter;
+  final private DocumentsWriterPerThread docWriter;
 
-  public IntBlockPool(DocumentsWriter docWriter) {
+  public IntBlockPool(DocumentsWriterPerThread docWriter) {
     this.docWriter = docWriter;
   }
 
   public void reset() {
     if (bufferUpto != -1) {
-      if (bufferUpto > 0)
-        // Recycle all but the first buffer
-        docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto);
-
       // Reuse first buffer
+      if (bufferUpto > 0) {
+        docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1);
+        Arrays.fill(buffers, 1, bufferUpto, null);
+      }
       bufferUpto = 0;
       intUpto = 0;
       intOffset = 0;
@@ -57,7 +59,7 @@ final class IntBlockPool {
     bufferUpto++;
 
     intUpto = 0;
-    intOffset += DocumentsWriter.INT_BLOCK_SIZE;
+    intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE;
   }
 }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java Mon May  2 13:50:57 2011
@@ -17,20 +17,22 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.util.Collection;
-import java.util.Map;
 import java.io.IOException;
+import java.util.Map;
 
 abstract class InvertedDocConsumer {
 
-  /** Add a new thread */
-  abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
-
   /** Abort (called after hitting AbortException) */
   abstract void abort();
 
   /** Flush a new segment */
-  abstract void flush(Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+  abstract void flush(Map<FieldInfo, InvertedDocConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
+
+  abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+
+  abstract void startDocument() throws IOException;
+
+  abstract void finishDocument() throws IOException;
 
   /** Attempt to free RAM, returning true if any RAM was
    *  freed */

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java Mon May  2 13:50:57 2011
@@ -17,12 +17,13 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.util.Collection;
-import java.util.Map;
 import java.io.IOException;
+import java.util.Map;
 
 abstract class InvertedDocEndConsumer {
-  abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
-  abstract void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+  abstract void flush(Map<FieldInfo, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
   abstract void abort();
+  abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+  abstract void startDocument() throws IOException;
+  abstract void finishDocument() throws IOException;
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon May  2 13:50:57 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Set;
@@ -72,7 +71,6 @@ public abstract class LogMergePolicy ext
   // out there wrote his own LMP ...
   protected long maxMergeSizeForOptimize = Long.MAX_VALUE;
   protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
-  protected boolean requireContiguousMerge = false;
 
   protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
 
@@ -111,21 +109,6 @@ public abstract class LogMergePolicy ext
       writer.get().message("LMP: " + message);
   }
 
-  /** If true, merges must be in-order slice of the
-   *  segments.  If false, then the merge policy is free to
-   *  pick any segments.  The default is false, which is
-   *  in general more efficient than true since it gives the
-   *  merge policy more freedom to pick closely sized
-   *  segments. */
-  public void setRequireContiguousMerge(boolean v) {
-    requireContiguousMerge = v;
-  }
-
-  /** See {@link #setRequireContiguousMerge}. */
-  public boolean getRequireContiguousMerge() {
-    return requireContiguousMerge;
-  }
-
   /** <p>Returns the number of segments that are merged at
    * once and also controls the total number of segments
    * allowed to accumulate in the index.</p> */
@@ -378,8 +361,6 @@ public abstract class LogMergePolicy ext
       return null;
     }
 
-    // TODO: handle non-contiguous merge case differently?
-    
     // Find the newest (rightmost) segment that needs to
     // be optimized (other segments may have been flushed
     // since optimize started):
@@ -499,14 +480,6 @@ public abstract class LogMergePolicy ext
     }
   }
 
-  private static class SortByIndex implements Comparator<SegmentInfoAndLevel> {
-    public int compare(SegmentInfoAndLevel o1, SegmentInfoAndLevel o2) {
-      return o1.index - o2.index;
-    }
-  }
-
-  private static final SortByIndex sortByIndex = new SortByIndex();
-
   /** Checks if any merges are now necessary and returns a
    *  {@link MergePolicy.MergeSpecification} if so.  A merge
    *  is necessary when there are more than {@link
@@ -532,31 +505,24 @@ public abstract class LogMergePolicy ext
       final SegmentInfo info = infos.info(i);
       long size = size(info);
 
-      // When we require contiguous merge, we still add the
-      // segment to levels to avoid merging "across" a set
-      // of segment being merged:
-      if (!requireContiguousMerge && mergingSegments.contains(info)) {
-        if (verbose()) {
-          message("seg " + info.name + " already being merged; skip");
-        }
-        continue;
-      }
-
       // Floor tiny segments
       if (size < 1) {
         size = 1;
       }
+
       final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
       levels.add(infoLevel);
+
       if (verbose()) {
-        message("seg " + info.name + " level=" + infoLevel.level + " size=" + size);
+        final long segBytes = sizeBytes(info);
+        String extra = mergingSegments.contains(info) ? " [merging]" : "";
+        if (size >= maxMergeSize) {
+          extra += " [skip: too large]";
+        }
+        message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
       }
     }
 
-    if (!requireContiguousMerge) {
-      Collections.sort(levels);
-    }
-
     final float levelFloor;
     if (minMergeSize <= 0)
       levelFloor = (float) 0.0;
@@ -614,23 +580,29 @@ public abstract class LogMergePolicy ext
       int end = start + mergeFactor;
       while(end <= 1+upto) {
         boolean anyTooLarge = false;
+        boolean anyMerging = false;
         for(int i=start;i<end;i++) {
           final SegmentInfo info = levels.get(i).info;
           anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
+          if (mergingSegments.contains(info)) {
+            anyMerging = true;
+            break;
+          }
         }
 
-        if (!anyTooLarge) {
+        if (anyMerging) {
+          // skip
+        } else if (!anyTooLarge) {
           if (spec == null)
             spec = new MergeSpecification();
-          if (verbose()) {
-            message("    " + start + " to " + end + ": add this merge");
-          }
-          Collections.sort(levels.subList(start, end), sortByIndex);
           final SegmentInfos mergeInfos = new SegmentInfos();
           for(int i=start;i<end;i++) {
             mergeInfos.add(levels.get(i).info);
             assert infos.contains(levels.get(i).info);
           }
+          if (verbose()) {
+            message("  add merge=" + writer.get().segString(mergeInfos) + " start=" + start + " end=" + end);
+          }
           spec.add(new OneMerge(mergeInfos));
         } else if (verbose()) {
           message("    " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
@@ -682,7 +654,7 @@ public abstract class LogMergePolicy ext
     sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
     sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
     sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
-    sb.append("requireContiguousMerge=").append(requireContiguousMerge);
+    sb.append("noCFSRatio=").append(noCFSRatio);
     sb.append("]");
     return sb.toString();
   }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon May  2 13:50:57 2011
@@ -72,6 +72,7 @@ public abstract class MergePolicy implem
     long mergeGen;                  // used by IndexWriter
     boolean isExternal;             // used by IndexWriter
     int maxNumSegmentsOptimize;     // used by IndexWriter
+    long estimatedMergeBytes;       // used by IndexWriter
     List<SegmentReader> readers;        // used by IndexWriter
     List<SegmentReader> readerClones;   // used by IndexWriter
     public final SegmentInfos segments;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFields.java Mon May  2 13:50:57 2011
@@ -51,7 +51,6 @@ public final class MultiFields extends F
   private final Fields[] subs;
   private final ReaderUtil.Slice[] subSlices;
   private final Map<String,Terms> terms = new ConcurrentHashMap<String,Terms>();
-  private final Map<String,DocValues> docValues = new ConcurrentHashMap<String,DocValues>();
 
   /** Returns a single {@link Fields} instance for this
    *  reader, merging fields/terms/docs/positions on the
@@ -193,12 +192,6 @@ public final class MultiFields extends F
     }
   }
   
-  /**  This method may return null if the field does not exist.*/
-  public static DocValues getDocValues(IndexReader r, String field) throws IOException {
-    final Fields fields = getFields(r);
-    return fields == null? null: fields.docValues(field);
-  }
-
   /** Returns {@link DocsEnum} for the specified field &
    *  term.  This may return null if the term does not
    *  exist. */
@@ -283,41 +276,5 @@ public final class MultiFields extends F
     return result;
   }
   
-  @Override
-  public DocValues docValues(String field) throws IOException {
-    DocValues result = docValues.get(field);
-    if (result == null) {
-      // Lazy init: first time this field is requested, we
-      // create & add to docValues:
-      final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
-      int docsUpto = 0;
-      Type type = null;
-      // Gather all sub-readers that share this field
-      for(int i=0;i<subs.length;i++) {
-         DocValues values = subs[i].docValues(field);
-         final int start = subSlices[i].start;
-         final int length = subSlices[i].length;
-        if (values != null) {
-          if (docsUpto != start) {
-            type = values.type();
-            docValuesIndex.add(new MultiDocValues.DocValuesIndex(new MultiDocValues.DummyDocValues(start, type), docsUpto, start - docsUpto));
-          }
-          docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start, length));
-          docsUpto = start+length;
-
-        } else if (i+1 == subs.length && !docValuesIndex.isEmpty()) {
-          docValuesIndex.add(new MultiDocValues.DocValuesIndex(
-              new MultiDocValues.DummyDocValues(start, type), docsUpto, start
-                  - docsUpto));
-        }
-      }
-      if (docValuesIndex.isEmpty()) {
-        return null;
-      }
-      result = new MultiDocValues(docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
-      docValues.put(field, result);
-    } 
-    return result; 
-  }
 }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiFieldsEnum.java Mon May  2 13:50:57 2011
@@ -17,12 +17,9 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import org.apache.lucene.index.values.DocValues;
 import org.apache.lucene.index.values.MultiDocValues;
-import org.apache.lucene.index.values.Type;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.ReaderUtil;
-import org.apache.lucene.util.ReaderUtil.Slice;
 
 import java.io.IOException;
 import java.util.List;
@@ -153,37 +150,37 @@ public final  class MultiFieldsEnum exte
     }
   }
 
-  @Override
-  public DocValues docValues() throws IOException {
-    final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
-    int docsUpto = 0;
-    Type type = null;
-    final int numEnums = enumWithSlices.length;
-    for (int i = 0; i < numEnums; i++) {
-      FieldsEnumWithSlice withSlice = enumWithSlices[i];
-      Slice slice = withSlice.slice;
-      final DocValues values = withSlice.fields.docValues();
-      final int start = slice.start;
-      final int length = slice.length;
-      if (values != null && currentField.equals(withSlice.current)) {
-        if (docsUpto != start) {
-          type = values.type();
-          docValuesIndex.add(new MultiDocValues.DocValuesIndex(
-              new MultiDocValues.DummyDocValues(start, type), docsUpto, start
-                  - docsUpto));
-        }
-        docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
-            length));
-        docsUpto = start + length;
-
-      } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) {
-        docValuesIndex.add(new MultiDocValues.DocValuesIndex(
-            new MultiDocValues.DummyDocValues(start, type), docsUpto, start
-                - docsUpto));
-      }
-    }
-    return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex
-        .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
-  }
+//  @Override
+//  public DocValues docValues() throws IOException {
+//    final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
+//    int docsUpto = 0;
+//    Type type = null;
+//    final int numEnums = enumWithSlices.length;
+//    for (int i = 0; i < numEnums; i++) {
+//      FieldsEnumWithSlice withSlice = enumWithSlices[i];
+//      Slice slice = withSlice.slice;
+//      final DocValues values = withSlice.fields.docValues();
+//      final int start = slice.start;
+//      final int length = slice.length;
+//      if (values != null && currentField.equals(withSlice.current)) {
+//        if (docsUpto != start) {
+//          type = values.type();
+//          docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+//              new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+//                  - docsUpto));
+//        }
+//        docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
+//            length));
+//        docsUpto = start + length;
+//
+//      } else if (i + 1 == numEnums && !docValuesIndex.isEmpty()) {
+//        docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+//            new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+//                - docsUpto));
+//      }
+//    }
+//    return docValuesIndex.isEmpty() ? null : docValues.reset(docValuesIndex
+//        .toArray(MultiDocValues.DocValuesIndex.EMPTY_ARRAY));
+//  }
 }
 

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java?rev=1098566&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiPerDocValues.java Mon May  2 13:50:57 2011
@@ -0,0 +1,148 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeSet;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.index.codecs.PerDocValues;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.MultiDocValues;
+import org.apache.lucene.index.values.Type;
+import org.apache.lucene.index.values.MultiDocValues.DocValuesIndex;
+import org.apache.lucene.util.ReaderUtil;
+
+/**
+ * 
+ * nocommit - javadoc
+ * @experimental
+ */
+public class MultiPerDocValues extends PerDocValues {
+  private final PerDocValues[] subs;
+  private final ReaderUtil.Slice[] subSlices;
+  private final Map<String, DocValues> docValues = new ConcurrentHashMap<String, DocValues>();
+  private final TreeSet<String> fields;
+
+  public MultiPerDocValues(PerDocValues[] subs, ReaderUtil.Slice[] subSlices) {
+    this.subs = subs;
+    this.subSlices = subSlices;
+    fields = new TreeSet<String>();
+    for (PerDocValues sub : subs) {
+      fields.addAll(sub.fields());
+    }
+  }
+
+  public static PerDocValues getPerDocs(IndexReader r) throws IOException {
+    final IndexReader[] subs = r.getSequentialSubReaders();
+    if (subs == null) {
+      // already an atomic reader
+      return r.perDocValues();
+    } else if (subs.length == 0) {
+      // no fields
+      return null;
+    } else if (subs.length == 1) {
+      return getPerDocs(subs[0]);
+    }
+    PerDocValues perDocValues = r.retrievePerDoc();
+    if (perDocValues == null) {
+
+      final List<PerDocValues> producer = new ArrayList<PerDocValues>();
+      final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
+
+      new ReaderUtil.Gather(r) {
+        @Override
+        protected void add(int base, IndexReader r) throws IOException {
+          final PerDocValues f = r.perDocValues();
+          if (f != null) {
+            producer.add(f);
+            slices
+                .add(new ReaderUtil.Slice(base, r.maxDoc(), producer.size() - 1));
+          }
+        }
+      }.run();
+
+      if (producer.size() == 0) {
+        return null;
+      } else if (producer.size() == 1) {
+        perDocValues = producer.get(0);
+      } else {
+        perDocValues = new MultiPerDocValues(
+            producer.toArray(PerDocValues.EMPTY_ARRAY),
+            slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
+      }
+      r.storePerDoc(perDocValues);
+    }
+    return perDocValues;
+  }
+
+  public DocValues docValues(String field) throws IOException {
+    DocValues result = docValues.get(field);
+    if (result == null) {
+      // Lazy init: first time this field is requested, we
+      // create & add to docValues:
+      final List<MultiDocValues.DocValuesIndex> docValuesIndex = new ArrayList<MultiDocValues.DocValuesIndex>();
+      int docsUpto = 0;
+      Type type = null;
+      // Gather all sub-readers that share this field
+      for (int i = 0; i < subs.length; i++) {
+        DocValues values = subs[i].docValues(field);
+        final int start = subSlices[i].start;
+        final int length = subSlices[i].length;
+        if (values != null) {
+          if (docsUpto != start) {
+            type = values.type();
+            docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+                new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+                    - docsUpto));
+          }
+          docValuesIndex.add(new MultiDocValues.DocValuesIndex(values, start,
+              length));
+          docsUpto = start + length;
+
+        } else if (i + 1 == subs.length && !docValuesIndex.isEmpty()) {
+          docValuesIndex.add(new MultiDocValues.DocValuesIndex(
+              new MultiDocValues.DummyDocValues(start, type), docsUpto, start
+                  - docsUpto));
+        }
+      }
+      if (docValuesIndex.isEmpty()) {
+        return null;
+      }
+      result = new MultiDocValues(
+          docValuesIndex.toArray(DocValuesIndex.EMPTY_ARRAY));
+      docValues.put(field, result);
+    }
+    return result;
+  }
+
+  @Override
+  public void close() throws IOException {
+    PerDocValues[] perDocValues = this.subs;
+    for (PerDocValues values : perDocValues) {
+      values.close();
+    }
+  }
+
+  @Override
+  public Collection<String> fields() {
+    return fields;
+  }
+}

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MultiReader.java Mon May  2 13:50:57 2011
@@ -24,6 +24,7 @@ import java.util.concurrent.ConcurrentHa
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.codecs.PerDocValues;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ReaderUtil;
@@ -403,4 +404,9 @@ public class MultiReader extends IndexRe
       sub.removeReaderFinishedListener(listener);
     }
   }
+
+  @Override
+  public PerDocValues perDocValues() throws IOException {
+    throw new UnsupportedOperationException("please use MultiPerDoc#getPerDocs, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriter.java Mon May  2 13:50:57 2011
@@ -19,11 +19,7 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.Iterator;
-import java.util.HashMap;
 import java.util.Map;
-import java.util.List;
-import java.util.ArrayList;
 
 import org.apache.lucene.store.IndexOutput;
 
@@ -36,10 +32,6 @@ import org.apache.lucene.store.IndexOutp
 
 final class NormsWriter extends InvertedDocEndConsumer {
 
-  @Override
-  public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
-    return new NormsWriterPerThread(docInverterPerThread, this);
-  }
 
   @Override
   public void abort() {}
@@ -50,40 +42,11 @@ final class NormsWriter extends Inverted
   /** Produce _X.nrm if any document had a field with norms
    *  not disabled */
   @Override
-  public void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
-
-    final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
-
+  public void flush(Map<FieldInfo,InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
     if (!state.fieldInfos.hasNorms()) {
       return;
     }
 
-    // Typically, each thread will have encountered the same
-    // field.  So first we collate by field, ie, all
-    // per-thread field instances that correspond to the
-    // same FieldInfo
-    for (final Map.Entry<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> entry : threadsAndFields.entrySet()) {
-      final Collection<InvertedDocEndConsumerPerField> fields = entry.getValue();
-      final Iterator<InvertedDocEndConsumerPerField> fieldsIt = fields.iterator();
-
-      while (fieldsIt.hasNext()) {
-        final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
-
-        if (perField.upto > 0) {
-          // It has some norms
-          List<NormsWriterPerField> l = byField.get(perField.fieldInfo);
-          if (l == null) {
-            l = new ArrayList<NormsWriterPerField>();
-            byField.put(perField.fieldInfo, l);
-          }
-          l.add(perField);
-        } else
-          // Remove this field since we haven't seen it
-          // since the previous flush
-          fieldsIt.remove();
-      }
-    }
-
     final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
     IndexOutput normsOut = state.directory.createOutput(normsFileName);
 
@@ -93,60 +56,25 @@ final class NormsWriter extends Inverted
       int normCount = 0;
 
       for (FieldInfo fi : state.fieldInfos) {
-        final List<NormsWriterPerField> toMerge = byField.get(fi);
+        final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi);
         int upto = 0;
-        if (toMerge != null) {
-
-          final int numFields = toMerge.size();
-
+        if (toWrite != null && toWrite.upto > 0) {
           normCount++;
 
-          final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
-          int[] uptos = new int[numFields];
-
-          for(int j=0;j<numFields;j++)
-            fields[j] = toMerge.get(j);
-
-          int numLeft = numFields;
-              
-          while(numLeft > 0) {
-
-            assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
-
-            int minLoc = 0;
-            int minDocID = fields[0].docIDs[uptos[0]];
-
-            for(int j=1;j<numLeft;j++) {
-              final int docID = fields[j].docIDs[uptos[j]];
-              if (docID < minDocID) {
-                minDocID = docID;
-                minLoc = j;
-              }
-            }
-
-            assert minDocID < state.numDocs;
-
-            // Fill hole
-            for(;upto<minDocID;upto++)
+          int docID = 0;
+          for (; docID < state.numDocs; docID++) {
+            if (upto < toWrite.upto && toWrite.docIDs[upto] == docID) {
+              normsOut.writeByte(toWrite.norms[upto]);
+              upto++;
+            } else {
               normsOut.writeByte((byte) 0);
-
-            normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
-            (uptos[minLoc])++;
-            upto++;
-
-            if (uptos[minLoc] == fields[minLoc].upto) {
-              fields[minLoc].reset();
-              if (minLoc != numLeft-1) {
-                fields[minLoc] = fields[numLeft-1];
-                uptos[minLoc] = uptos[numLeft-1];
-              }
-              numLeft--;
             }
           }
-          
-          // Fill final hole with defaultNorm
-          for(;upto<state.numDocs;upto++)
-            normsOut.writeByte((byte) 0);
+
+          // we should have consumed every norm
+          assert upto == toWrite.upto;
+
+          toWrite.reset();
         } else if (fi.isIndexed && !fi.omitNorms) {
           normCount++;
           // Fill entire field with default norm:
@@ -161,4 +89,16 @@ final class NormsWriter extends Inverted
       normsOut.close();
     }
   }
+
+  @Override
+  void finishDocument() throws IOException {}
+
+  @Override
+  void startDocument() throws IOException {}
+
+  @Override
+  InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField,
+      FieldInfo fieldInfo) {
+    return new NormsWriterPerField(docInverterPerField, fieldInfo);
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Mon May  2 13:50:57 2011
@@ -27,9 +27,8 @@ import org.apache.lucene.util.ArrayUtil;
 
 final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable<NormsWriterPerField> {
 
-  final NormsWriterPerThread perThread;
   final FieldInfo fieldInfo;
-  final DocumentsWriter.DocState docState;
+  final DocumentsWriterPerThread.DocState docState;
   final Similarity similarity;
   
   // Holds all docID/norm pairs we've seen
@@ -46,10 +45,9 @@ final class NormsWriterPerField extends 
     upto = 0;
   }
 
-  public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) {
-    this.perThread = perThread;
+  public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
     this.fieldInfo = fieldInfo;
-    docState = perThread.docState;
+    docState = docInverterPerField.docState;
     fieldState = docInverterPerField.fieldState;
     similarity = docState.similarityProvider.get(fieldInfo.name);
   }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ParallelReader.java Mon May  2 13:50:57 2011
@@ -21,9 +21,8 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.document.FieldSelectorResult;
 import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.codecs.PerDocValues;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.Pair;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.MapBackedSet;
 
@@ -183,21 +182,15 @@ public class ParallelReader extends Inde
       }
     }
 
-    @Override
-    public DocValues docValues() throws IOException {
-      assert currentReader != null;
-      return MultiFields.getDocValues(currentReader, currentField);
-    }
   }
 
   // Single instance of this, per ParallelReader instance
   private class ParallelFields extends Fields {
-    final HashMap<String,Pair<Terms, DocValues>> fields = new HashMap<String,Pair<Terms, DocValues>>();
+    final HashMap<String,Terms> fields = new HashMap<String,Terms>();
 
     public void addField(String field, IndexReader r) throws IOException {
       Fields multiFields = MultiFields.getFields(r);
-      fields.put(field, new Pair<Terms, DocValues>( multiFields.terms(field),
-          multiFields.docValues(field)));
+      fields.put(field, multiFields.terms(field));
     }
 
     @Override
@@ -206,12 +199,7 @@ public class ParallelReader extends Inde
     }
     @Override
     public Terms terms(String field) throws IOException {
-      return fields.get(field).cur;
-    }
-
-    @Override
-    public DocValues docValues(String field) throws IOException {
-      return fields.get(field).cud;
+      return fields.get(field);
     }
   }
   
@@ -578,6 +566,12 @@ public class ParallelReader extends Inde
       reader.removeReaderFinishedListener(listener);
     }
   }
+
+  @Override
+  public PerDocValues perDocValues() throws IOException {
+    // TODO Auto-generated method stub
+    return null;
+  }
 }
 
 

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java?rev=1098566&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerDocWriteState.java Mon May  2 13:50:57 2011
@@ -0,0 +1,77 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.PrintStream;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * nocommit - javadoc
+ * @lucene.experimental
+ */
+public class PerDocWriteState {
+  public final PrintStream infoStream;
+  public final Directory directory;
+  public final String segmentName;
+  public final FieldInfos fieldInfos;
+  public final AtomicLong bytesUsed;
+  public final SegmentCodecs segmentCodecs;
+  public final int codecId;
+
+  /** Expert: The fraction of terms in the "dictionary" which should be stored
+   * in RAM.  Smaller values use more memory, but make searching slightly
+   * faster, while larger values use less memory and make searching slightly
+   * slower.  Searching is typically not dominated by dictionary lookup, so
+   * tweaking this is rarely useful.*/
+  public int termIndexInterval;                   // TODO: this should be private to the codec, not settable here or in IWC
+
+  public PerDocWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, AtomicLong bytesUsed, int codecId) {
+    this.infoStream = infoStream;
+    this.directory = directory;
+    this.segmentName = segmentName;
+    this.fieldInfos = fieldInfos;
+    this.segmentCodecs = fieldInfos.buildSegmentCodecs(false);
+    this.codecId = codecId;
+    this.bytesUsed = bytesUsed;
+  }
+  
+  public PerDocWriteState(SegmentWriteState state) {
+    infoStream = state.infoStream;
+    directory = state.directory;
+    segmentCodecs = state.segmentCodecs;
+    segmentName = state.segmentName;
+    fieldInfos = state.fieldInfos;
+    codecId = state.codecId;
+    bytesUsed = new AtomicLong(0);
+  }
+  
+  public PerDocWriteState(PerDocWriteState state, int codecId) {
+    this.infoStream = state.infoStream;
+    this.directory = state.directory;
+    this.segmentName = state.segmentName;
+    this.fieldInfos = state.fieldInfos;
+    this.segmentCodecs = state.segmentCodecs;
+    this.codecId = codecId;
+    this.bytesUsed = state.bytesUsed;
+  }
+  
+  
+  public String codecIdAsString() {
+    return "" + codecId;
+  }
+}

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Mon May  2 13:50:57 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
@@ -28,6 +29,8 @@ import java.util.TreeSet;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.FieldsProducer;
+import org.apache.lucene.index.codecs.PerDocConsumer;
+import org.apache.lucene.index.codecs.PerDocValues;
 import org.apache.lucene.index.codecs.TermsConsumer;
 import org.apache.lucene.index.codecs.docvalues.DocValuesConsumer;
 import org.apache.lucene.index.values.DocValues;
@@ -75,12 +78,6 @@ final class PerFieldCodecWrapper extends
     }
 
     @Override
-    public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
-      final FieldsConsumer fields = consumers.get(field.getCodecId());
-      return fields.addValuesField(field);
-    }
-
-    @Override
     public void close() throws IOException {
       Iterator<FieldsConsumer> it = consumers.iterator();
       IOException err = null;
@@ -113,7 +110,7 @@ final class PerFieldCodecWrapper extends
       boolean success = false;
       try {
         for (FieldInfo fi : fieldInfos) {
-          if (fi.isIndexed || fi.hasDocValues()) { // TODO this does not work for non-indexed fields
+          if (fi.isIndexed) { 
             fields.add(fi.name);
             assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
             Codec codec = segmentCodecs.codecs[fi.getCodecId()];
@@ -171,11 +168,6 @@ final class PerFieldCodecWrapper extends
           return TermsEnum.EMPTY;
         }
       }
-
-      @Override
-      public DocValues docValues() throws IOException {
-        return codecs.get(current).docValues(current);
-      }
     }
 
     @Override
@@ -190,12 +182,6 @@ final class PerFieldCodecWrapper extends
     }
     
     @Override
-    public DocValues docValues(String field) throws IOException {
-      FieldsProducer fieldsProducer = codecs.get(field);
-      return fieldsProducer == null? null: fieldsProducer.docValues(field);
-    }
-
-    @Override
     public void close() throws IOException {
       Iterator<FieldsProducer> it = codecs.values().iterator();
       IOException err = null;
@@ -244,4 +230,133 @@ final class PerFieldCodecWrapper extends
       codec.getExtensions(extensions);
     }
   }
+
+  @Override
+  public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
+    return new PerDocConsumers(state);
+  }
+
+  @Override
+  public PerDocValues docsProducer(SegmentReadState state) throws IOException {
+    return new PerDocProducers(state.dir, state.fieldInfos, state.segmentInfo,
+    state.readBufferSize, state.termsIndexDivisor);
+  }
+  
+  private final class PerDocProducers extends PerDocValues {
+    private final Set<String> fields = new TreeSet<String>();
+    private final Map<String, PerDocValues> codecs = new HashMap<String, PerDocValues>();
+
+    public PerDocProducers(Directory dir, FieldInfos fieldInfos, SegmentInfo si,
+        int readBufferSize, int indexDivisor) throws IOException {
+      final Map<Codec, PerDocValues> producers = new HashMap<Codec, PerDocValues>();
+      boolean success = false;
+      try {
+        for (FieldInfo fi : fieldInfos) {
+          if (fi.hasDocValues()) { 
+            fields.add(fi.name);
+            assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
+            Codec codec = segmentCodecs.codecs[fi.getCodecId()];
+            if (!producers.containsKey(codec)) {
+              producers.put(codec, codec.docsProducer(new SegmentReadState(dir,
+                si, fieldInfos, readBufferSize, indexDivisor, fi.getCodecId())));
+            }
+            codecs.put(fi.name, producers.get(codec));
+          }
+        }
+        success = true;
+      } finally {
+        if (!success) {
+          // If we hit exception (eg, IOE because writer was
+          // committing, or, for any other reason) we must
+          // go back and close all FieldsProducers we opened:
+          for(PerDocValues producer : producers.values()) {
+            try {
+              producer.close();
+            } catch (Throwable t) {
+              // Suppress all exceptions here so we continue
+              // to throw the original one
+            }
+          }
+        }
+      }
+    }
+    @Override
+    public Collection<String> fields() {
+      return fields;
+    }
+    @Override
+    public DocValues docValues(String field) throws IOException {
+      final PerDocValues perDocProducer = codecs.get(field);
+      if (perDocProducer == null) {
+        return null;
+      }
+      return perDocProducer.docValues(field);
+    }
+    
+    @Override
+    public void close() throws IOException {
+      final Iterator<PerDocValues> it = codecs.values().iterator();
+      IOException err = null;
+      while (it.hasNext()) {
+        try {
+          it.next().close();
+        } catch (IOException ioe) {
+          // keep first IOException we hit but keep
+          // closing the rest
+          if (err == null) {
+            err = ioe;
+          }
+        }
+      }
+      if (err != null) {
+        throw err;
+      }
+    }
+  }
+  
+  private final class PerDocConsumers extends PerDocConsumer {
+    private final ArrayList<PerDocConsumer> consumers = new ArrayList<PerDocConsumer>();
+
+    public PerDocConsumers(PerDocWriteState state) throws IOException {
+      assert segmentCodecs == state.segmentCodecs;
+      final Codec[] codecs = segmentCodecs.codecs;
+      for (int i = 0; i < codecs.length; i++) {
+        consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i)));
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      Iterator<PerDocConsumer> it = consumers.iterator();
+      IOException err = null;
+      while (it.hasNext()) {
+        try {
+          PerDocConsumer next = it.next();
+          if (next != null) {
+            next.close();
+          }
+        } catch (IOException ioe) {
+          // keep first IOException we hit but keep
+          // closing the rest
+          if (err == null) {
+            err = ioe;
+          }
+        }
+      }
+      if (err != null) {
+        throw err;
+      }
+    }
+
+    @Override
+    public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
+      assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
+      final PerDocConsumer perDoc = consumers.get(field.getCodecId());
+      if (perDoc == null) {
+        return null;
+      }
+      return perDoc.addValuesField(field);
+    }
+    
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Mon May  2 13:50:57 2011
@@ -39,14 +39,14 @@ import org.apache.lucene.util.Constants;
 /**
  * Information about a segment such as it's name, directory, and files related
  * to the segment.
- * 
+ *
  * @lucene.experimental
  */
 public final class SegmentInfo {
 
   static final int NO = -1;          // e.g. no norms; no deletes;
   static final int YES = 1;          // e.g. have norms; have deletes;
-  static final int WITHOUT_GEN = 0;  // a file name that has no GEN in it. 
+  static final int WITHOUT_GEN = 0;  // a file name that has no GEN in it.
 
   public String name;				  // unique name in dir
   public int docCount;				  // number of docs in seg
@@ -58,7 +58,7 @@ public final class SegmentInfo {
    * - YES or higher if there are deletes at generation N
    */
   private long delGen;
-  
+
   /*
    * Current generation of each field's norm file. If this array is null,
    * means no separate norms. If this array is not null, its values mean:
@@ -67,7 +67,7 @@ public final class SegmentInfo {
    */
   private Map<Integer,Long> normGen;
 
-  private boolean isCompoundFile;         
+  private boolean isCompoundFile;
 
   private volatile List<String> files;                     // cached list of files that this segment uses
                                                   // in the Directory
@@ -75,10 +75,13 @@ public final class SegmentInfo {
   private volatile long sizeInBytesNoStore = -1;           // total byte size of all but the store files (computed on demand)
   private volatile long sizeInBytesWithStore = -1;         // total byte size of all of our files (computed on demand)
 
+  //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
   private int docStoreOffset;                     // if this segment shares stored fields & vectors, this
                                                   // offset is where in that file this segment's docs begin
+  //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
   private String docStoreSegment;                 // name used to derive fields/vectors file we share with
                                                   // other segments
+  //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
   private boolean docStoreIsCompoundFile;         // whether doc store files are stored in compound file (*.cfx)
 
   private int delCount;                           // How many deleted docs in this segment
@@ -93,9 +96,9 @@ public final class SegmentInfo {
 
   private Map<String,String> diagnostics;
 
-  // Tracks the Lucene version this segment was created with, since 3.1. Null 
+  // Tracks the Lucene version this segment was created with, since 3.1. Null
   // indicates an older than 3.0 index, and it's used to detect a too old index.
-  // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and 
+  // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
   // specific versions afterwards ("3.0", "3.1" etc.).
   // see Constants.LUCENE_MAIN_VERSION.
   private String version;
@@ -103,7 +106,7 @@ public final class SegmentInfo {
   // NOTE: only used in-RAM by IW to track buffered deletes;
   // this is never written to/read from the Directory
   private long bufferedDeletesGen;
-  
+
   public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
                      boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
     this.name = name;
@@ -184,11 +187,13 @@ public final class SegmentInfo {
       docStoreSegment = name;
       docStoreIsCompoundFile = false;
     }
+
     if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
       // pre-4.0 indexes write a byte if there is a single norms file
       byte b = input.readByte();
       assert 1 == b;
     }
+
     int numNormGen = input.readInt();
     if (numNormGen == NO) {
       normGen = null;
@@ -209,7 +214,7 @@ public final class SegmentInfo {
     assert delCount <= docCount;
 
     hasProx = input.readByte() == YES;
-    
+
     // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
     if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
       segmentCodecs = new SegmentCodecs(codecs, input);
@@ -219,7 +224,7 @@ public final class SegmentInfo {
       segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")});
     }
     diagnostics = input.readStringStringMap();
-    
+
     if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
       hasVectors = input.readByte() == 1;
     } else {
@@ -368,7 +373,7 @@ public final class SegmentInfo {
       // against this segment
       return null;
     } else {
-      return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen); 
+      return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
     }
   }
 
@@ -434,7 +439,7 @@ public final class SegmentInfo {
     if (hasSeparateNorms(number)) {
       return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
     } else {
-      // single file for all norms 
+      // single file for all norms
       return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
     }
   }
@@ -467,39 +472,74 @@ public final class SegmentInfo {
     assert delCount <= docCount;
   }
 
+  /**
+   * @deprecated shared doc stores are not supported in >= 4.0
+   */
+  @Deprecated
   public int getDocStoreOffset() {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
     return docStoreOffset;
   }
-  
+
+  /**
+   * @deprecated shared doc stores are not supported in >= 4.0
+   */
+  @Deprecated
   public boolean getDocStoreIsCompoundFile() {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
     return docStoreIsCompoundFile;
   }
-  
-  void setDocStoreIsCompoundFile(boolean v) {
-    docStoreIsCompoundFile = v;
+
+  /**
+   * @deprecated shared doc stores are not supported in >= 4.0
+   */
+  @Deprecated
+  public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+    this.docStoreIsCompoundFile = docStoreIsCompoundFile;
     clearFilesCache();
   }
-  
+
+  /**
+   * @deprecated shared doc stores are not supported in >= 4.0
+   */
+  @Deprecated
+  void setDocStore(int offset, String segment, boolean isCompoundFile) {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+    docStoreOffset = offset;
+    docStoreSegment = segment;
+    docStoreIsCompoundFile = isCompoundFile;
+    clearFilesCache();
+  }
+
+  /**
+   * @deprecated shared doc stores are not supported in >= 4.0
+   */
+  @Deprecated
   public String getDocStoreSegment() {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
     return docStoreSegment;
   }
-  
-  public void setDocStoreSegment(String segment) {
-    docStoreSegment = segment;
-  }
-  
+
+  /**
+   * @deprecated shared doc stores are not supported in >= 4.0
+   */
+  @Deprecated
   void setDocStoreOffset(int offset) {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
     docStoreOffset = offset;
     clearFilesCache();
   }
 
-  void setDocStore(int offset, String segment, boolean isCompoundFile) {        
-    docStoreOffset = offset;
-    docStoreSegment = segment;
-    docStoreIsCompoundFile = isCompoundFile;
-    clearFilesCache();
+  /**
+   * @deprecated shared doc stores are not supported in 4.0
+   */
+  @Deprecated
+  public void setDocStoreSegment(String docStoreSegment) {
+    // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+    this.docStoreSegment = docStoreSegment;
   }
-  
+
   /** Save this segment's info. */
   public void write(IndexOutput output)
     throws IOException {
@@ -509,12 +549,14 @@ public final class SegmentInfo {
     output.writeString(name);
     output.writeInt(docCount);
     output.writeLong(delGen);
+
     output.writeInt(docStoreOffset);
     if (docStoreOffset != -1) {
       output.writeString(docStoreSegment);
       output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
     }
 
+
     if (normGen == null) {
       output.writeInt(NO);
     } else {
@@ -524,7 +566,7 @@ public final class SegmentInfo {
         output.writeLong(entry.getValue());
       }
     }
-    
+
     output.writeByte((byte) (isCompoundFile ? YES : NO));
     output.writeInt(delCount);
     output.writeByte((byte) (hasProx ? 1:0));
@@ -572,9 +614,9 @@ public final class SegmentInfo {
       // Already cached:
       return files;
     }
-    
+
     Set<String> fileSet = new HashSet<String>();
-    
+
     boolean useCompoundFile = getUseCompoundFile();
 
     if (useCompoundFile) {
@@ -608,7 +650,7 @@ public final class SegmentInfo {
         fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
         fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
         fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
-      }      
+      }
     }
 
     String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
@@ -646,7 +688,7 @@ public final class SegmentInfo {
   }
 
   /** Used for debugging.  Format may suddenly change.
-   * 
+   *
    *  <p>Current format looks like
    *  <code>_a(3.1):c45/4->_1</code>, which means the segment's
    *  name is <code>_a</code>; it was created with Lucene 3.1 (or
@@ -661,7 +703,6 @@ public final class SegmentInfo {
 
     StringBuilder s = new StringBuilder();
     s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
-
     char cfs = getUseCompoundFile() ? 'c' : 'C';
     s.append(cfs);
 
@@ -677,7 +718,7 @@ public final class SegmentInfo {
     if (delCount != 0) {
       s.append('/').append(delCount);
     }
-    
+
     if (docStoreOffset != -1) {
       s.append("->").append(docStoreSegment);
       if (docStoreIsCompoundFile) {
@@ -717,13 +758,13 @@ public final class SegmentInfo {
    * <b>NOTE:</b> this method is used for internal purposes only - you should
    * not modify the version of a SegmentInfo, or it may result in unexpected
    * exceptions thrown when you attempt to open the index.
-   * 
+   *
    * @lucene.internal
    */
   public void setVersion(String version) {
     this.version = version;
   }
-  
+
   /** Returns the version of the code which wrote the segment. */
   public String getVersion() {
     return version;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1098566&r1=1098565&r2=1098566&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Mon May  2 13:50:57 2011
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
-import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader.FieldOption;
@@ -31,6 +30,8 @@ import org.apache.lucene.index.codecs.Co
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.MergeState;
+import org.apache.lucene.index.codecs.PerDocConsumer;
+import org.apache.lucene.index.codecs.PerDocValues;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -40,24 +41,24 @@ import org.apache.lucene.util.ReaderUtil
 
 /**
  * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
- * into a single Segment.  After adding the appropriate readers, call the merge method to combine the 
+ * into a single Segment.  After adding the appropriate readers, call the merge method to combine the
  * segments.
- * 
+ *
  * @see #merge
  * @see #add
  */
 final class SegmentMerger {
-  
+
   /** norms header placeholder */
-  static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; 
-  
+  static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
+
   private Directory directory;
   private String segment;
   private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
 
   private List<IndexReader> readers = new ArrayList<IndexReader>();
   private final FieldInfos fieldInfos;
-  
+
   private int mergedDocs;
 
   private final MergeState.CheckAbort checkAbort;
@@ -65,12 +66,12 @@ final class SegmentMerger {
   /** Maximum number of contiguous documents to bulk-copy
       when merging stored fields */
   private final static int MAX_RAW_MERGE_DOCS = 4192;
-  
+
   private Codec codec;
   private SegmentWriteState segmentWriteState;
 
   private PayloadProcessorProvider payloadProcessorProvider;
-  
+
   SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
     this.payloadProcessorProvider = payloadProcessorProvider;
     directory = dir;
@@ -133,10 +134,10 @@ final class SegmentMerger {
     for (String file : files) {
       cfsWriter.addFile(file);
     }
-    
+
     // Perform the merge
     cfsWriter.close();
-   
+
     return files;
   }
 
@@ -194,13 +195,12 @@ final class SegmentMerger {
   }
 
   /**
-   * 
+   *
    * @return The number of documents in all of the readers
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
   private int mergeFields() throws CorruptIndexException, IOException {
-
     for (IndexReader reader : readers) {
       if (reader instanceof SegmentReader) {
         SegmentReader segmentReader = (SegmentReader) reader;
@@ -263,8 +263,8 @@ final class SegmentMerger {
       // details.
       throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption");
 
-    segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null, new AtomicLong(0));
-    
+    segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null);
+
     return docCount;
   }
 
@@ -282,7 +282,7 @@ final class SegmentMerger {
           ++j;
           continue;
         }
-        // We can optimize this case (doing a bulk byte copy) since the field 
+        // We can optimize this case (doing a bulk byte copy) since the field
         // numbers are identical
         int start = j, numDocs = 0;
         do {
@@ -294,7 +294,7 @@ final class SegmentMerger {
             break;
           }
         } while(numDocs < MAX_RAW_MERGE_DOCS);
-        
+
         IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
         fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs);
         docCount += numDocs;
@@ -348,7 +348,7 @@ final class SegmentMerger {
    * @throws IOException
    */
   private final void mergeVectors() throws IOException {
-    TermVectorsWriter termVectorsWriter = 
+    TermVectorsWriter termVectorsWriter =
       new TermVectorsWriter(directory, segment, fieldInfos);
 
     try {
@@ -368,7 +368,7 @@ final class SegmentMerger {
           copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
         } else {
           copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
-          
+
         }
       }
     } finally {
@@ -401,7 +401,7 @@ final class SegmentMerger {
           ++docNum;
           continue;
         }
-        // We can optimize this case (doing a bulk byte copy) since the field 
+        // We can optimize this case (doing a bulk byte copy) since the field
         // numbers are identical
         int start = docNum, numDocs = 0;
         do {
@@ -413,7 +413,7 @@ final class SegmentMerger {
             break;
           }
         } while(numDocs < MAX_RAW_MERGE_DOCS);
-        
+
         matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
         termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
         checkAbort.work(300 * numDocs);
@@ -424,7 +424,7 @@ final class SegmentMerger {
           // skip deleted docs
           continue;
         }
-        
+
         // NOTE: it's very important to first assign to vectors then pass it to
         // termVectorsWriter.addAllDocVectors; see LUCENE-1282
         TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
@@ -433,7 +433,7 @@ final class SegmentMerger {
       }
     }
   }
-  
+
   private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter,
                                       final TermVectorsReader matchingVectorsReader,
                                       final IndexReader reader)
@@ -469,13 +469,20 @@ final class SegmentMerger {
 
     // Let CodecProvider decide which codec will be used to write
     // the new segment:
-    
+
     int docBase = 0;
 
     final List<Fields> fields = new ArrayList<Fields>();
+
     final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
     final List<Bits> bits = new ArrayList<Bits>();
     final List<Integer> bitsStarts = new ArrayList<Integer>();
+    
+    // TODO: move this into its own method - this merges currently only docvalues
+    final List<PerDocValues> perDocProducers = new ArrayList<PerDocValues>();    
+    final List<ReaderUtil.Slice> perDocSlices = new ArrayList<ReaderUtil.Slice>();
+    final List<Bits> perDocBits = new ArrayList<Bits>();
+    final List<Integer> perDocBitsStarts = new ArrayList<Integer>();
 
     for(IndexReader r : readers) {
       final Fields f = r.fields();
@@ -486,10 +493,18 @@ final class SegmentMerger {
         bits.add(r.getDeletedDocs());
         bitsStarts.add(docBase);
       }
+      final PerDocValues producer = r.perDocValues();
+      if (producer != null) {
+        perDocSlices.add(new ReaderUtil.Slice(docBase, maxDoc, fields.size()));
+        perDocProducers.add(producer);
+        perDocBits.add(r.getDeletedDocs());
+        perDocBitsStarts.add(docBase);
+      }
       docBase += maxDoc;
     }
 
     bitsStarts.add(docBase);
+    perDocBitsStarts.add(docBase);
 
     // we may gather more readers than mergeState.readerCount
     mergeState = new MergeState();
@@ -497,7 +512,7 @@ final class SegmentMerger {
     mergeState.readerCount = readers.size();
     mergeState.fieldInfos = fieldInfos;
     mergeState.mergedDocCount = mergedDocs;
-    
+
     // Remap docIDs
     mergeState.delCounts = new int[mergeState.readerCount];
     mergeState.docMaps = new int[mergeState.readerCount][];
@@ -535,7 +550,7 @@ final class SegmentMerger {
         }
         assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount;
       }
-      
+
       if (payloadProcessorProvider != null) {
         mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory());
       }
@@ -548,7 +563,7 @@ final class SegmentMerger {
     // apart when we step through the docs enums in
     // MultiDocsEnum.
     mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
-    
+
     try {
       consumer.merge(mergeState,
                      new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
@@ -556,6 +571,21 @@ final class SegmentMerger {
     } finally {
       consumer.close();
     }
+    if (!perDocSlices.isEmpty()) {
+      mergeState.multiDeletedDocs = new MultiBits(perDocBits, perDocBitsStarts);
+      final PerDocConsumer docsConsumer = codec
+          .docsConsumer(new PerDocWriteState(segmentWriteState));
+      try {
+        docsConsumer.merge(
+            mergeState,
+            new MultiPerDocValues(perDocProducers
+                .toArray(PerDocValues.EMPTY_ARRAY), perDocSlices
+                .toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
+      } finally {
+        docsConsumer.close();
+      }
+    }
+    
   }
 
   private MergeState mergeState;
@@ -567,7 +597,7 @@ final class SegmentMerger {
   int[] getDelCounts() {
     return mergeState.delCounts;
   }
-  
+
   public boolean getAnyNonBulkMerges() {
     assert matchedCount <= readers.size();
     return matchedCount != readers.size();
@@ -578,7 +608,7 @@ final class SegmentMerger {
     try {
       for (FieldInfo fi : fieldInfos) {
         if (fi.isIndexed && !fi.omitNorms) {
-          if (output == null) { 
+          if (output == null) {
             output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
             output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
           }
@@ -609,7 +639,7 @@ final class SegmentMerger {
         }
       }
     } finally {
-      if (output != null) { 
+      if (output != null) {
         output.close();
       }
     }