You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/03/25 17:24:13 UTC

svn commit: r1085449 - in /lucene/dev/trunk/lucene/src: java/org/apache/lucene/index/ test-framework/org/apache/lucene/util/ test/org/apache/lucene/index/

Author: simonw
Date: Fri Mar 25 16:24:12 2011
New Revision: 1085449

URL: http://svn.apache.org/viewvc?rev=1085449&view=rev
Log:
LUCENE-2985: Build SegmentCodecs incrementally for consistent codecIDs during indexing

Modified:
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java Fri Mar 25 16:24:12 2011
@@ -162,7 +162,7 @@ final class DocFieldProcessorPerThread e
         // needs to be more "pluggable" such that if I want
         // to have a new "thing" my Fields can do, I can
         // easily add it
-        FieldInfo fi = fieldInfos.add(fieldName, field.isIndexed(), field.isTermVectorStored(),
+        FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
                                       field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
                                       field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
         fp = new DocFieldProcessorPerField(this, fi);
@@ -172,11 +172,11 @@ final class DocFieldProcessorPerThread e
 
         if (totalFieldCount >= fieldHash.length/2)
           rehash();
-      } else
-        fp.fieldInfo.update(field.isIndexed(), field.isTermVectorStored(),
+      } else {
+        fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
                             field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
                             field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
-
+      }
       if (thisFieldGen != fp.lastGen) {
 
         // First time we're seeing this field for this doc

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Mar 25 16:24:12 2011
@@ -283,7 +283,6 @@ final class DocumentsWriter {
     this.fieldInfos = fieldInfos;
     this.bufferedDeletesStream = bufferedDeletesStream;
     flushControl = writer.flushControl;
-
     consumer = config.getIndexingChain().getChain(this);
     this.config = config;
   }
@@ -539,10 +538,10 @@ final class DocumentsWriter {
       if (infoStream != null) {
         message("flush postings as segment " + segment + " numDocs=" + numDocs);
       }
-
+      
       final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
                                                                  numDocs, writer.getConfig().getTermIndexInterval(),
-                                                                 SegmentCodecs.build(fieldInfos, writer.codecs),
+                                                                 fieldInfos.buildSegmentCodecs(true),
                                                                  pendingDeletes);
       // Apply delete-by-docID now (delete-byDocID only
       // happens when an exception is hit processing that

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfo.java Fri Mar 25 16:24:12 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 /** @lucene.experimental */
 public final class FieldInfo {
+  public static final int UNASSIGNED_CODEC_ID = -1;
   public String name;
   public boolean isIndexed;
   public int number;
@@ -32,7 +33,7 @@ public final class FieldInfo {
   public boolean omitTermFreqAndPositions;
 
   public boolean storePayloads; // whether this field stores payloads together with term positions
-  private int codecId = -1; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
+  private int codecId = UNASSIGNED_CODEC_ID; // set inside SegmentCodecs#build() during segment flush - this is used to identify the codec used to write this field
 
   FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, 
             boolean storePositionWithTermVector,  boolean storeOffsetWithTermVector, 
@@ -57,8 +58,8 @@ public final class FieldInfo {
     }
   }
 
-  public void setCodecId(int codecId) {
-    assert this.codecId == -1 : "CodecId can only be set once.";
+  void setCodecId(int codecId) {
+    assert this.codecId == UNASSIGNED_CODEC_ID : "CodecId can only be set once.";
     this.codecId = codecId;
   }
 
@@ -74,6 +75,7 @@ public final class FieldInfo {
     return clone;
   }
 
+  // should only be called by FieldInfos#addOrUpdate
   void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, 
               boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
     if (this.isIndexed != isIndexed) {

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldInfos.java Fri Mar 25 16:24:12 2011
@@ -28,6 +28,8 @@ import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.Map.Entry;
 
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -147,8 +149,8 @@ public final class FieldInfos implements
      * @return a new {@link FieldInfos} instance with this as the global field
      *         map
      */
-    public FieldInfos newFieldInfos() {
-      return new FieldInfos(this);
+    public FieldInfos newFieldInfos(SegmentCodecsBuilder segmentCodecsBuilder) {
+      return new FieldInfos(this, segmentCodecsBuilder);
     }
 
     /**
@@ -193,6 +195,7 @@ public final class FieldInfos implements
   private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>();
   private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>();
   private final FieldNumberBiMap globalFieldNumbers;
+  private final SegmentCodecsBuilder segmentCodecsBuilder;
   
   // First used in 2.9; prior to 2.9 there was no format header
   public static final int FORMAT_START = -2;
@@ -215,14 +218,15 @@ public final class FieldInfos implements
 
   /**
    * Creates a new {@link FieldInfos} instance with a private
-   * {@link FieldNumberBiMap}.
+   * {@link FieldNumberBiMap} and a default {@link SegmentCodecsBuilder}
+   * initialized with {@link CodecProvider#getDefault()}.
    * <p>
    * Note: this ctor should not be used during indexing use
    * {@link FieldInfos#FieldInfos(FieldInfos)} or
    * {@link FieldInfos#FieldInfos(FieldNumberBiMap)} instead.
    */
   public FieldInfos() {
-    this(new FieldNumberBiMap());
+    this(new FieldNumberBiMap(), SegmentCodecsBuilder.create(CodecProvider.getDefault()));
   }
   
   /**
@@ -232,7 +236,7 @@ public final class FieldInfos implements
    * @see #isReadOnly()
    */
   FieldInfos(FieldInfos other) {
-    this(other.globalFieldNumbers);
+    this(other.globalFieldNumbers, other.segmentCodecsBuilder);
   }
   
   /**
@@ -240,8 +244,9 @@ public final class FieldInfos implements
    * If the {@link FieldNumberBiMap} is <code>null</code> this instance will be read-only.
    * @see #isReadOnly()
    */
-  FieldInfos(FieldNumberBiMap globalFieldNumbers) {
+  FieldInfos(FieldNumberBiMap globalFieldNumbers, SegmentCodecsBuilder segmentCodecsBuilder) {
     this.globalFieldNumbers = globalFieldNumbers;
+    this.segmentCodecsBuilder = segmentCodecsBuilder;
   }
 
   /**
@@ -255,7 +260,7 @@ public final class FieldInfos implements
    * @throws IOException
    */
   public FieldInfos(Directory d, String name) throws IOException {
-    this((FieldNumberBiMap)null); // use null here to make this FIs Read-Only
+    this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
     IndexInput input = d.openInput(name);
     try {
       read(input, name);
@@ -291,7 +296,7 @@ public final class FieldInfos implements
    */
   @Override
   synchronized public Object clone() {
-    FieldInfos fis = new FieldInfos(globalFieldNumbers);
+    FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
     for (FieldInfo fi : this) {
       FieldInfo clone = (FieldInfo) (fi).clone();
       fis.putInternal(clone);
@@ -310,17 +315,17 @@ public final class FieldInfos implements
   }
   
   /**
-   * Add fields that are indexed. Whether they have termvectors has to be specified.
+   * Adds or updates fields that are indexed. Whether they have termvectors has to be specified.
    * 
    * @param names The names of the fields
    * @param storeTermVectors Whether the fields store term vectors or not
    * @param storePositionWithTermVector true if positions should be stored.
    * @param storeOffsetWithTermVector true if offsets should be stored
    */
-  synchronized public void addIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector, 
+  synchronized public void addOrUpdateIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector, 
                          boolean storeOffsetWithTermVector) {
     for (String name : names) {
-      add(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
+      addOrUpdate(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector);
     }
   }
 
@@ -330,11 +335,11 @@ public final class FieldInfos implements
    * @param names The names of the fields
    * @param isIndexed Whether the fields are indexed or not
    * 
-   * @see #add(String, boolean)
+   * @see #addOrUpdate(String, boolean)
    */
-  synchronized public void add(Collection<String> names, boolean isIndexed) {
+  synchronized public void addOrUpdate(Collection<String> names, boolean isIndexed) {
     for (String name : names) {
-      add(name, isIndexed);
+      addOrUpdate(name, isIndexed);
     }
   }
 
@@ -343,10 +348,10 @@ public final class FieldInfos implements
    * 
    * @param name The name of the Fieldable
    * @param isIndexed true if the field is indexed
-   * @see #add(String, boolean, boolean, boolean, boolean)
+   * @see #addOrUpdate(String, boolean, boolean, boolean, boolean)
    */
-  synchronized public void add(String name, boolean isIndexed) {
-    add(name, isIndexed, false, false, false, false);
+  synchronized public void addOrUpdate(String name, boolean isIndexed) {
+    addOrUpdate(name, isIndexed, false, false, false, false);
   }
 
   /**
@@ -356,8 +361,8 @@ public final class FieldInfos implements
    * @param isIndexed  true if the field is indexed
    * @param storeTermVector true if the term vector should be stored
    */
-  synchronized public void add(String name, boolean isIndexed, boolean storeTermVector){
-    add(name, isIndexed, storeTermVector, false, false, false);
+  synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector){
+    addOrUpdate(name, isIndexed, storeTermVector, false, false, false);
   }
   
   /** If the field is not yet known, adds it. If it is known, checks to make
@@ -371,10 +376,10 @@ public final class FieldInfos implements
    * @param storePositionWithTermVector true if the term vector with positions should be stored
    * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
    */
-  synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+  synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
                   boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
 
-    add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
+    addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
   }
 
     /** If the field is not yet known, adds it. If it is known, checks to make
@@ -389,9 +394,9 @@ public final class FieldInfos implements
    * @param storeOffsetWithTermVector true if the term vector with offsets should be stored
    * @param omitNorms true if the norms for the indexed field should be omitted
    */
-  synchronized public void add(String name, boolean isIndexed, boolean storeTermVector,
+  synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
                   boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
-    add(name, isIndexed, storeTermVector, storePositionWithTermVector,
+    addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector,
         storeOffsetWithTermVector, omitNorms, false, false);
   }
   
@@ -409,7 +414,7 @@ public final class FieldInfos implements
    * @param storePayloads true if payloads should be stored for this field
    * @param omitTermFreqAndPositions true if term freqs should be omitted for this field
    */
-  synchronized public FieldInfo add(String name, boolean isIndexed, boolean storeTermVector,
+  synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
                        boolean storePositionWithTermVector, boolean storeOffsetWithTermVector,
                        boolean omitNorms, boolean storePayloads, boolean omitTermFreqAndPositions) {
     return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector,
@@ -422,13 +427,17 @@ public final class FieldInfos implements
     if (globalFieldNumbers == null) {
       throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
     }
-    final FieldInfo fi = fieldInfo(name);
+    assert segmentCodecsBuilder != null : "SegmentCodecsBuilder is set to null but FieldInfos is not read-only";
+    FieldInfo fi = fieldInfo(name);
     if (fi == null) {
       final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
-      return addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
+      fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
     } else {
       fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions);
     }
+    if (fi.isIndexed && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
+      segmentCodecsBuilder.tryAddAndSet(fi);
+    }
     return fi;
   }
 
@@ -515,6 +524,22 @@ public final class FieldInfos implements
     }
     return false;
   }
+  
+  /**
+   * Builds the {@link SegmentCodecs} mapping for this {@link FieldInfos} instance.
+   * @param clearBuilder <code>true</code> iff the internal {@link SegmentCodecsBuilder} must be cleared otherwise <code>false</code>
+   */
+  public SegmentCodecs buildSegmentCodecs(boolean clearBuilder) {
+    if (globalFieldNumbers == null) {
+      throw new IllegalStateException("FieldInfos are read-only no SegmentCodecs available");
+    }
+    assert segmentCodecsBuilder != null;
+    final SegmentCodecs segmentCodecs = segmentCodecsBuilder.build();
+    if (clearBuilder) {
+      segmentCodecsBuilder.clear();
+    }
+    return segmentCodecs;
+  }
 
   public void write(Directory d, String name) throws IOException {
     IndexOutput output = d.createOutput(name);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri Mar 25 16:24:12 2011
@@ -38,6 +38,7 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.AlreadyClosedException;
@@ -790,7 +791,7 @@ public class IndexWriter implements Clos
       // start with previous field numbers, but new FieldInfos
       globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
       docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(),
-          globalFieldNumberMap.newFieldInfos(), bufferedDeletesStream);
+          globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream);
       docWriter.setInfoStream(infoStream);
 
       // Default deleter (for backwards compatibility) is
@@ -2208,7 +2209,7 @@ public class IndexWriter implements Clos
       String mergedName = newSegmentName();
       SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
                                                mergedName, null, codecs, payloadProcessorProvider,
-                                               globalFieldNumberMap.newFieldInfos());
+                                               globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
       
       for (IndexReader reader : readers)      // add new indexes
         merger.add(reader);
@@ -2951,7 +2952,7 @@ public class IndexWriter implements Clos
     // Bind a new segment name here so even with
     // ConcurrentMergePolicy we keep deterministic segment
     // names.
-    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos());
+    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
 
     // Lock order: IW -> BD
     final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Fri Mar 25 16:24:12 2011
@@ -67,6 +67,7 @@ final class PerFieldCodecWrapper extends
 
     @Override
     public TermsConsumer addField(FieldInfo field) throws IOException {
+      assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
       final FieldsConsumer fields = consumers.get(field.getCodecId());
       return fields.addField(field);
     }
@@ -106,6 +107,7 @@ final class PerFieldCodecWrapper extends
         for (FieldInfo fi : fieldInfos) {
           if (fi.isIndexed) { // TODO this does not work for non-indexed fields
             fields.add(fi.name);
+            assert fi.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
             Codec codec = segmentCodecs.codecs[fi.getCodecId()];
             if (!producers.containsKey(codec)) {
               producers.put(codec, codec.fieldsProducer(new SegmentReadState(dir,

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCodecs.java Fri Mar 25 16:24:12 2011
@@ -38,17 +38,16 @@ import org.apache.lucene.store.IndexOutp
  * {@link SegmentWriteState} for each flush and is maintained in the
  * corresponding {@link SegmentInfo} until it is committed.
  * <p>
- * {@link SegmentCodecs#build(FieldInfos, CodecProvider)} should be used to
- * create a {@link SegmentCodecs} instance during {@link IndexWriter} sessions
- * which creates the ordering of distinct codecs and assigns the
- * {@link FieldInfo#codecId} or in other words, the ord of the codec maintained
- * inside {@link SegmentCodecs}, to the {@link FieldInfo}. This ord is valid
- * only until the current segment is flushed and {@link FieldInfos} for that
- * segment are written including the ord for each field. This ord is later used
- * to get the right codec when the segment is opened in a reader. The
- * {@link Codec} returned from {@link SegmentCodecs#codec()} in turn uses
- * {@link SegmentCodecs} internal structure to select and initialize the right
- * codec for a fields when it is written.
+ * During indexing {@link FieldInfos} uses {@link SegmentCodecsBuilder} to incrementally
+ * build the {@link SegmentCodecs} mapping. Once a segment is flushed
+ * DocumentsWriter creates a {@link SegmentCodecs} instance from
+ * {@link FieldInfos#buildSegmentCodecs(boolean)} The {@link FieldInfo#codecId}
+ * assigned by {@link SegmentCodecsBuilder} refers to the codecs ordinal
+ * maintained inside {@link SegmentCodecs}. This ord is later used to get the
+ * right codec when the segment is opened in a reader.The {@link Codec} returned
+ * from {@link SegmentCodecs#codec()} in turn uses {@link SegmentCodecs}
+ * internal structure to select and initialize the right codec for a fields when
+ * it is written.
  * <p>
  * Once a flush succeeded the {@link SegmentCodecs} is maintained inside the
  * {@link SegmentInfo} for the flushed segment it was created for.
@@ -64,36 +63,19 @@ final class SegmentCodecs implements Clo
    * internal structure to map codecs to fields - don't modify this from outside
    * of this class!
    */
-  Codec[] codecs;
+  final Codec[] codecs;
   final CodecProvider provider;
   private final Codec codec = new PerFieldCodecWrapper(this);
-
+  
+  SegmentCodecs(CodecProvider provider, IndexInput input) throws IOException {
+    this(provider, read(input, provider));
+  }
+  
   SegmentCodecs(CodecProvider provider, Codec... codecs) {
     this.provider = provider;
     this.codecs = codecs;
   }
 
-  static SegmentCodecs build(FieldInfos infos, CodecProvider provider) {
-    final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
-    final ArrayList<Codec> codecs = new ArrayList<Codec>();
-
-    for (FieldInfo fi : infos) {
-      if (fi.isIndexed) {
-        final Codec fieldCodec = provider.lookup(provider
-            .getFieldCodec(fi.name));
-        Integer ord = codecRegistry.get(fieldCodec);
-        if (ord == null) {
-          ord = Integer.valueOf(codecs.size());
-          codecRegistry.put(fieldCodec, ord);
-          codecs.add(fieldCodec);
-        }
-        fi.setCodecId(ord.intValue());
-      }
-    }
-    return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
-
-  }
-
   Codec codec() {
     return codec;
   }
@@ -105,7 +87,7 @@ final class SegmentCodecs implements Clo
     }
   }
 
-  void read(IndexInput in) throws IOException {
+  private static Codec[] read(IndexInput in, CodecProvider provider) throws IOException {
     final int size = in.readVInt();
     final ArrayList<Codec> list = new ArrayList<Codec>();
     for (int i = 0; i < size; i++) {
@@ -113,7 +95,7 @@ final class SegmentCodecs implements Clo
       final Codec lookup = provider.lookup(codecName);
       list.add(i, lookup);
     }
-    codecs = list.toArray(Codec.EMPTY);
+    return list.toArray(Codec.EMPTY);
   }
 
   void files(Directory dir, SegmentInfo info, Set<String> files)
@@ -129,4 +111,58 @@ final class SegmentCodecs implements Clo
   public String toString() {
     return "SegmentCodecs [codecs=" + Arrays.toString(codecs) + ", provider=" + provider + "]";
   }
+  
+  /**
+   * Used in {@link FieldInfos} to incrementally build the codec ID mapping for
+   * {@link FieldInfo} instances.
+   * <p>
+   * Note: this class is not thread-safe
+   * </p>
+   * @see FieldInfo#getCodecId()
+   */
+  final static class SegmentCodecsBuilder {
+    private final Map<Codec, Integer> codecRegistry = new IdentityHashMap<Codec, Integer>();
+    private final ArrayList<Codec> codecs = new ArrayList<Codec>();
+    private final CodecProvider provider;
+
+    private SegmentCodecsBuilder(CodecProvider provider) {
+      this.provider = provider;
+    }
+    
+    static SegmentCodecsBuilder create(CodecProvider provider) {
+      return new SegmentCodecsBuilder(provider);
+    }
+    
+    SegmentCodecsBuilder tryAddAndSet(FieldInfo fi) {
+      if (fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
+        final Codec fieldCodec = provider.lookup(provider
+            .getFieldCodec(fi.name));
+        Integer ord = codecRegistry.get(fieldCodec);
+        if (ord == null) {
+          ord = Integer.valueOf(codecs.size());
+          codecRegistry.put(fieldCodec, ord);
+          codecs.add(fieldCodec);
+        }
+        fi.setCodecId(ord.intValue());
+      }
+      return this;
+    }
+    
+    SegmentCodecsBuilder addAll(FieldInfos infos) {
+      for (FieldInfo fieldInfo : infos) {
+        tryAddAndSet(fieldInfo);
+      }
+      return this;
+    }
+    
+    SegmentCodecs build() {
+      return new SegmentCodecs(provider, codecs.toArray(Codec.EMPTY));
+    }
+    
+    SegmentCodecsBuilder clear() {
+      codecRegistry.clear();
+      codecs.clear();
+      return this;
+    }
+  }
 }
\ No newline at end of file

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Fri Mar 25 16:24:12 2011
@@ -209,13 +209,12 @@ public final class SegmentInfo {
     hasProx = input.readByte() == YES;
     
     // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
-    segmentCodecs = new SegmentCodecs(codecs);
     if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
-      segmentCodecs.read(input);
+      segmentCodecs = new SegmentCodecs(codecs, input);
     } else {
       // codec ID on FieldInfo is 0 so it will simply use the first codec available
       // TODO what todo if preflex is not available in the provider? register it or fail?
-      segmentCodecs.codecs = new Codec[] { codecs.lookup("PreFlex")};
+      segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")});
     }
     diagnostics = input.readStringStringMap();
     

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Fri Mar 25 16:24:12 2011
@@ -148,7 +148,7 @@ final class SegmentMerger {
       boolean storePayloads, boolean omitTFAndPositions)
       throws IOException {
     for (String field : names) {
-      fInfos.add(field, true, storeTermVectors,
+      fInfos.addOrUpdate(field, true, storeTermVectors,
           storePositionWithTermVector, storeOffsetWithTermVector, !reader
               .hasNorms(field), storePayloads, omitTFAndPositions);
     }
@@ -218,10 +218,10 @@ final class SegmentMerger {
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true);
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, false);
         addIndexed(reader, fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, false);
-        fieldInfos.add(reader.getFieldNames(FieldOption.UNINDEXED), false);
+        fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
       }
     }
-    final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, this.codecs);
+    final SegmentCodecs codecInfo = fieldInfos.buildSegmentCodecs(false);
     fieldInfos.write(directory, segment + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
 
     int docCount = 0;

Modified: lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/trunk/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java Fri Mar 25 16:24:12 2011
@@ -334,7 +334,7 @@ public class _TestUtil {
   public static void add(Document doc, FieldInfos fieldInfos) {
     List<Fieldable> fields = doc.getFields();
     for (Fieldable field : fields) {
-      fieldInfos.add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
+      fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
               field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
     }
   }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestCodecs.java Fri Mar 25 16:24:12 2011
@@ -80,7 +80,7 @@ public class TestCodecs extends LuceneTe
     public FieldData(final String name, final FieldInfos fieldInfos, final TermData[] terms, final boolean omitTF, final boolean storePayloads) {
       this.omitTF = omitTF;
       this.storePayloads = storePayloads;
-      fieldInfos.add(name, true);
+      fieldInfos.addOrUpdate(name, true);
       fieldInfo = fieldInfos.fieldInfo(name);
       fieldInfo.omitTermFreqAndPositions = omitTF;
       fieldInfo.storePayloads = storePayloads;
@@ -240,7 +240,7 @@ public class TestCodecs extends LuceneTe
     final Directory dir = newDirectory();
     FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
     this.write(fieldInfos, dir, fields, true);
-    final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, SegmentCodecs.build(clonedFieldInfos, CodecProvider.getDefault()), clonedFieldInfos.hasVectors(), clonedFieldInfos);
+    final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
     si.setHasProx(false);
 
     final FieldsProducer reader = si.getSegmentCodecs().codec().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, 64, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR));
@@ -293,7 +293,7 @@ public class TestCodecs extends LuceneTe
 
     FieldInfos clonedFieldInfos = (FieldInfos) fieldInfos.clone();
     this.write(fieldInfos, dir, fields, false);
-    final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true, SegmentCodecs.build(clonedFieldInfos, CodecProvider.getDefault()), clonedFieldInfos.hasVectors(), clonedFieldInfos);
+    final SegmentInfo si = new SegmentInfo(SEGMENT, 10000, dir, false, true,  clonedFieldInfos.buildSegmentCodecs(false), clonedFieldInfos.hasVectors(), clonedFieldInfos);
 
     if (VERBOSE) {
       System.out.println("TEST: now read postings");
@@ -442,7 +442,7 @@ public class TestCodecs extends LuceneTe
       for(int iter=0;iter<NUM_TEST_ITER;iter++) {
         final FieldData field = fields[TestCodecs.random.nextInt(fields.length)];
         final TermsEnum termsEnum = termsDict.terms(field.fieldInfo.name).iterator();
-
+        assertTrue(field.fieldInfo.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID);
         if (si.getSegmentCodecs().codecs[field.fieldInfo.getCodecId()] instanceof PreFlexCodec) {
           // code below expects unicode sort order
           continue;
@@ -591,12 +591,13 @@ public class TestCodecs extends LuceneTe
   private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields, boolean allowPreFlex) throws Throwable {
 
     final int termIndexInterval = _TestUtil.nextInt(random, 13, 27);
-    final SegmentCodecs codecInfo = SegmentCodecs.build(fieldInfos, CodecProvider.getDefault());
+    final SegmentCodecs codecInfo =  fieldInfos.buildSegmentCodecs(false);
     final SegmentWriteState state = new SegmentWriteState(null, dir, SEGMENT, fieldInfos, 10000, termIndexInterval, codecInfo, null);
 
     final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
     Arrays.sort(fields);
     for (final FieldData field : fields) {
+      assertTrue(field.fieldInfo.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID);
       if (!allowPreFlex && codecInfo.codecs[field.fieldInfo.getCodecId()] instanceof PreFlexCodec) {
         // code below expects unicode sort order
         continue;

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java?rev=1085449&r1=1085448&r2=1085449&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldInfos.java Fri Mar 25 16:24:12 2011
@@ -109,33 +109,33 @@ public class TestFieldInfos extends Luce
     }
     
     try {
-      readOnly.add("bogus", random.nextBoolean());
+      readOnly.addOrUpdate("bogus", random.nextBoolean());
       fail("instance should be read only");
     } catch (IllegalStateException e) {
       // expected
     }
     try {
-      readOnly.add("bogus", random.nextBoolean(), random.nextBoolean());
+      readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean());
       fail("instance should be read only");
     } catch (IllegalStateException e) {
       // expected
     }
     try {
-      readOnly.add("bogus", random.nextBoolean(), random.nextBoolean(),
+      readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
           random.nextBoolean(), random.nextBoolean());
       fail("instance should be read only");
     } catch (IllegalStateException e) {
       // expected
     }
     try {
-      readOnly.add("bogus", random.nextBoolean(), random.nextBoolean(),
+      readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
           random.nextBoolean(), random.nextBoolean(), random.nextBoolean());
       fail("instance should be read only");
     } catch (IllegalStateException e) {
       // expected
     }
     try {
-      readOnly.add("bogus", random.nextBoolean(), random.nextBoolean(),
+      readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
           random.nextBoolean(), random.nextBoolean(), random.nextBoolean(),
           random.nextBoolean(), random.nextBoolean());
       fail("instance should be read only");
@@ -143,7 +143,7 @@ public class TestFieldInfos extends Luce
       // expected
     }
     try {
-      readOnly.add(Arrays.asList("a", "b", "c"), random.nextBoolean());
+      readOnly.addOrUpdate(Arrays.asList("a", "b", "c"), random.nextBoolean());
       fail("instance should be read only");
     } catch (IllegalStateException e) {
       // expected