You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/09/19 22:57:11 UTC

svn commit: r1524840 [2/4] - in /lucene/dev/trunk/lucene: ./ codecs/src/java/org/apache/lucene/codecs/blockterms/ codecs/src/java/org/apache/lucene/codecs/bloom/ codecs/src/java/org/apache/lucene/codecs/memory/ codecs/src/java/org/apache/lucene/codecs/...

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.codecs.lucene4
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -511,11 +510,6 @@ class Lucene42DocValuesProducer extends 
     }
 
     @Override
-    public Comparator<BytesRef> getComparator() {
-      return BytesRef.getUTF8SortedAsUnicodeComparator();
-    }
-
-    @Override
     public SeekStatus seekCeil(BytesRef text) throws IOException {
       if (in.seekCeil(text) == null) {
         return SeekStatus.END;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene45/Lucene45DocValuesProducer.java Thu Sep 19 20:57:09 2013
@@ -26,7 +26,6 @@ import static org.apache.lucene.codecs.l
 
 import java.io.Closeable; // javadocs
 import java.io.IOException;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -811,11 +810,6 @@ public class Lucene45DocValuesProducer e
         public long ord() throws IOException {
           return currentOrd;
         }
-        
-        @Override
-        public Comparator<BytesRef> getComparator() {
-          return BytesRef.getUTF8SortedAsUnicodeComparator();
-        }
 
         @Override
         public int docFreq() throws IOException {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java Thu Sep 19 20:57:09 2013
@@ -17,26 +17,29 @@ package org.apache.lucene.codecs.perfiel
  * limitations under the License.
  */
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.ServiceLoader; // javadocs
+import java.util.Set;
 import java.util.TreeMap;
+import java.util.TreeSet;
 
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
 
+import static org.apache.lucene.index.FilterAtomicReader.FilterFields;
+
 /**
  * Enables per field postings support.
  * <p>
@@ -65,96 +68,22 @@ public abstract class PerFieldPostingsFo
    *  segment suffix name for each field. */
   public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix";
 
-  
   /** Sole constructor. */
   public PerFieldPostingsFormat() {
     super(PER_FIELD_NAME);
   }
 
-  @Override
-  public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
-      throws IOException {
-    return new FieldsWriter(state);
-  }
-  
-  static class FieldsConsumerAndSuffix implements Closeable {
-    FieldsConsumer consumer;
+  /** Group of fields written by one PostingsFormat */
+  static class FieldsGroup {
+    final Set<String> fields = new TreeSet<String>();
     int suffix;
-    
-    @Override
-    public void close() throws IOException {
-      consumer.close();
-    }
-  }
-    
-  private class FieldsWriter extends FieldsConsumer {
-
-    private final Map<PostingsFormat,FieldsConsumerAndSuffix> formats = new HashMap<PostingsFormat,FieldsConsumerAndSuffix>();
-    private final Map<String,Integer> suffixes = new HashMap<String,Integer>();
-    
-    private final SegmentWriteState segmentWriteState;
 
-    public FieldsWriter(SegmentWriteState state) {
-      segmentWriteState = state;
-    }
+    /** Custom SegmentWriteState for this group of fields,
+     *  with the segmentSuffix uniqueified for this
+     *  PostingsFormat */
+    SegmentWriteState state;
+  };
 
-    @Override
-    public TermsConsumer addField(FieldInfo field) throws IOException {
-      final PostingsFormat format = getPostingsFormatForField(field.name);
-      if (format == null) {
-        throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field.name + "\"");
-      }
-      final String formatName = format.getName();
-      
-      String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
-      assert previousValue == null;
-      
-      Integer suffix;
-      
-      FieldsConsumerAndSuffix consumer = formats.get(format);
-      if (consumer == null) {
-        // First time we are seeing this format; create a new instance
-        
-        // bump the suffix
-        suffix = suffixes.get(formatName);
-        if (suffix == null) {
-          suffix = 0;
-        } else {
-          suffix = suffix + 1;
-        }
-        suffixes.put(formatName, suffix);
-        
-        final String segmentSuffix = getFullSegmentSuffix(field.name,
-                                                          segmentWriteState.segmentSuffix,
-                                                          getSuffix(formatName, Integer.toString(suffix)));
-        consumer = new FieldsConsumerAndSuffix();
-        consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix));
-        consumer.suffix = suffix;
-        formats.put(format, consumer);
-      } else {
-        // we've already seen this format, so just grab its suffix
-        assert suffixes.containsKey(formatName);
-        suffix = consumer.suffix;
-      }
-      
-      previousValue = field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix));
-      assert previousValue == null;
-
-      // TODO: we should only provide the "slice" of FIS
-      // that this PF actually sees ... then stuff like
-      // .hasProx could work correctly?
-      // NOTE: .hasProx is already broken in the same way for the non-perfield case,
-      // if there is a fieldinfo with prox that has no postings, you get a 0 byte file.
-      return consumer.consumer.addField(field);
-    }
-
-    @Override
-    public void close() throws IOException {
-      // Close all subs
-      IOUtils.close(formats.values());
-    }
-  }
-  
   static String getSuffix(String formatName, String suffix) {
     return formatName + "_" + suffix;
   }
@@ -169,6 +98,87 @@ public abstract class PerFieldPostingsFo
       throw new IllegalStateException("cannot embed PerFieldPostingsFormat inside itself (field \"" + fieldName + "\" returned PerFieldPostingsFormat)");
     }
   }
+  
+  private class FieldsWriter extends FieldsConsumer {
+    final SegmentWriteState writeState;
+
+    public FieldsWriter(SegmentWriteState writeState) {
+      this.writeState = writeState;
+    }
+
+    @Override
+    public void write(Fields fields) throws IOException {
+
+      // Maps a PostingsFormat instance to the suffix it
+      // should use
+      Map<PostingsFormat,FieldsGroup> formatToGroups = new HashMap<PostingsFormat,FieldsGroup>();
+
+      // Holds last suffix of each PostingFormat name
+      Map<String,Integer> suffixes = new HashMap<String,Integer>();
+
+      // First pass: assign field -> PostingsFormat
+      for(String field : fields) {
+        FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(field);
+
+        final PostingsFormat format = getPostingsFormatForField(field);
+  
+        if (format == null) {
+          throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field + "\"");
+        }
+        String formatName = format.getName();
+      
+        FieldsGroup group = formatToGroups.get(format);
+        if (group == null) {
+          // First time we are seeing this format; create a
+          // new instance
+
+          // bump the suffix
+          Integer suffix = suffixes.get(formatName);
+          if (suffix == null) {
+            suffix = 0;
+          } else {
+            suffix = suffix + 1;
+          }
+          suffixes.put(formatName, suffix);
+
+          String segmentSuffix = getFullSegmentSuffix(field,
+                                                      writeState.segmentSuffix,
+                                                      getSuffix(formatName, Integer.toString(suffix)));
+          group = new FieldsGroup();
+          group.state = new SegmentWriteState(writeState, segmentSuffix);
+          group.suffix = suffix;
+          formatToGroups.put(format, group);
+        } else {
+          // we've already seen this format, so just grab its suffix
+          assert suffixes.containsKey(formatName);
+        }
+
+        group.fields.add(field);
+
+        String previousValue = fieldInfo.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
+        assert previousValue == null;
+
+        previousValue = fieldInfo.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(group.suffix));
+        assert previousValue == null;
+      }
+
+      // Second pass: write postings
+      for(Map.Entry<PostingsFormat,FieldsGroup> ent : formatToGroups.entrySet()) {
+        PostingsFormat format = ent.getKey();
+        final FieldsGroup group = ent.getValue();
+
+        // Exposes only the fields from this group:
+        Fields maskedFields = new FilterFields(fields) {
+            @Override
+            public Iterator<String> iterator() {
+              return group.fields.iterator();
+            }
+          };
+
+        format.fieldsConsumer(group.state).write(maskedFields);
+      }
+    }
+  }
 
   private class FieldsReader extends FieldsProducer {
 
@@ -239,6 +249,12 @@ public abstract class PerFieldPostingsFo
   }
 
   @Override
+  public final FieldsConsumer fieldsConsumer(SegmentWriteState state)
+      throws IOException {
+    return new FieldsWriter(state);
+  }
+
+  @Override
   public final FieldsProducer fieldsProducer(SegmentReadState state)
       throws IOException {
     return new FieldsReader(state);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
@@ -65,7 +64,6 @@ class AutomatonTermsEnum extends Filtere
   // of terms where we should simply do sequential reads instead.
   private boolean linear = false;
   private final BytesRef linearUpperBound = new BytesRef(10);
-  private final Comparator<BytesRef> termComp;
 
   /**
    * Construct an enumerator based upon an automaton, enumerating the specified
@@ -85,8 +83,6 @@ class AutomatonTermsEnum extends Filtere
 
     // used for path tracking, where each bit is a numbered state.
     visited = new long[runAutomaton.getSize()];
-
-    termComp = getComparator();
   }
   
   /**
@@ -99,10 +95,10 @@ class AutomatonTermsEnum extends Filtere
       if (runAutomaton.run(term.bytes, term.offset, term.length))
         return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
       else
-        return (linear && termComp.compare(term, linearUpperBound) < 0) ? 
+        return (linear && term.compareTo(linearUpperBound) < 0) ? 
             AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
     } else {
-      return (linear && termComp.compare(term, linearUpperBound) < 0) ? 
+      return (linear && term.compareTo(linearUpperBound) < 0) ? 
           AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
     }
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Thu Sep 19 20:57:09 2013
@@ -761,8 +761,6 @@ public class CheckIndex {
       
       BytesRef lastTerm = null;
       
-      Comparator<BytesRef> termComp = terms.getComparator();
-      
       long sumTotalTermFreq = 0;
       long sumDocFreq = 0;
       FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
@@ -780,7 +778,7 @@ public class CheckIndex {
         if (lastTerm == null) {
           lastTerm = BytesRef.deepCopyOf(term);
         } else {
-          if (termComp.compare(lastTerm, term) >= 0) {
+          if (lastTerm.compareTo(term) >= 0) {
             throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
           }
           lastTerm.copyBytes(term);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java Thu Sep 19 20:57:09 2013
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Comparator;
 import java.util.List;
 
 import org.apache.lucene.codecs.PostingsFormat; // javadocs
@@ -611,11 +610,6 @@ public class DocTermOrds {
       termsEnum = reader.fields().terms(field).iterator(null);
     }
 
-    @Override
-    public Comparator<BytesRef> getComparator() {
-      return termsEnum.getComparator();
-    }
-
     @Override    
     public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
       return termsEnum.docs(liveDocs, reuse, flags);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 import java.util.Iterator;
 
 import org.apache.lucene.search.CachingWrapperFilter;
@@ -98,11 +97,6 @@ public class FilterAtomicReader extends 
     public TermsEnum iterator(TermsEnum reuse) throws IOException {
       return in.iterator(reuse);
     }
-    
-    @Override
-    public Comparator<BytesRef> getComparator() {
-      return in.getComparator();
-    }
 
     @Override
     public long size() throws IOException {
@@ -200,11 +194,6 @@ public class FilterAtomicReader extends 
     public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
       return in.docsAndPositions(liveDocs, reuse, flags);
     }
-
-    @Override
-    public Comparator<BytesRef> getComparator() {
-      return in.getComparator();
-    }
   }
 
   /** Base class for filtering {@link DocsEnum} implementations. */

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.AttributeSource;
@@ -28,7 +27,7 @@ import org.apache.lucene.util.Bits;
  * Abstract class for enumerating a subset of all terms. 
  * 
  * <p>Term enumerations are always ordered by
- * {@link #getComparator}.  Each term in the enumeration is
+ * {@link BytesRef#compareTo}.  Each term in the enumeration is
  * greater than all that precede it.</p>
  * <p><em>Please note:</em> Consumers of this enum cannot
  * call {@code seek()}, it is forward only; it throws
@@ -135,11 +134,6 @@ public abstract class FilteredTermsEnum 
   }
 
   @Override
-  public Comparator<BytesRef> getComparator() {
-    return tenum.getComparator();
-  }
-    
-  @Override
   public int docFreq() throws IOException {
     return tenum.docFreq();
   }
@@ -221,7 +215,7 @@ public abstract class FilteredTermsEnum 
         final BytesRef t = nextSeekTerm(actualTerm);
         //System.out.println("  seek to t=" + (t == null ? "null" : t.utf8ToString()) + " tenum=" + tenum);
         // Make sure we always seek forward:
-        assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
+        assert actualTerm == null || t == null || t.compareTo(actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t;
         if (t == null || tenum.seekCeil(t) == SeekStatus.END) {
           // no more terms to seek to or enum exhausted
           //System.out.println("  return null");

Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java?rev=1524840&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java Thu Sep 19 20:57:09 2013
@@ -0,0 +1,523 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
+import org.apache.lucene.util.AttributeSource; // javadocs
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/** Implements limited (iterators only, no stats) {@link
+ *  Fields} interface over the in-RAM buffered
+ *  fields/terms/postings, to flush postings through the
+ *  PostingsFormat. */
+
+class FreqProxFields extends Fields {
+  final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<String,FreqProxTermsWriterPerField>();
+
+  public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) {
+    // NOTE: fields are already sorted by field name
+    for(FreqProxTermsWriterPerField field : fieldList) {
+      fields.put(field.fieldInfo.name, field);
+    }
+  }
+
+  public Iterator<String> iterator() {
+    return fields.keySet().iterator();
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    FreqProxTermsWriterPerField perField = fields.get(field);
+    return perField == null ? null : new FreqProxTerms(perField);
+  }
+
+  @Override
+  public int size() {
+    //return fields.size();
+    throw new UnsupportedOperationException();
+  }
+
+  private static class FreqProxTerms extends Terms {
+    final FreqProxTermsWriterPerField terms;
+
+    public FreqProxTerms(FreqProxTermsWriterPerField terms) {
+      this.terms = terms;
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse) {
+      FreqProxTermsEnum termsEnum;
+      if (reuse instanceof FreqProxTermsEnum && ((FreqProxTermsEnum) reuse).terms == this.terms) {
+        termsEnum = (FreqProxTermsEnum) reuse;
+      } else {
+        termsEnum = new FreqProxTermsEnum(terms);
+      }
+      termsEnum.reset();
+      return termsEnum;
+    }
+
+    @Override
+    public long size() {
+      //return terms.termsHashPerField.bytesHash.size();
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      //return terms.sumTotalTermFreq;
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long getSumDocFreq() {
+      //return terms.sumDocFreq;
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int getDocCount() {
+      //return terms.docCount;
+      throw new UnsupportedOperationException();
+    }
+  
+    @Override
+    public boolean hasOffsets() {
+      // NOTE: the in-memory buffer may have indexed offsets
+      // because that's what FieldInfo said when we started,
+      // but during indexing this may have been downgraded:
+      return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;      
+    }
+  
+    @Override
+    public boolean hasPositions() {
+      // NOTE: the in-memory buffer may have indexed positions
+      // because that's what FieldInfo said when we started,
+      // but during indexing this may have been downgraded:
+      return terms.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+  
+    @Override
+    public boolean hasPayloads() {
+      return terms.hasPayloads;
+    }
+  }
+
+  private static class FreqProxTermsEnum extends TermsEnum {
+    final FreqProxTermsWriterPerField terms;
+    final int[] sortedTermIDs;
+    final FreqProxPostingsArray postingsArray;
+    final BytesRef scratch = new BytesRef();
+    final int numTerms;
+    int ord;
+
+    public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) {
+      this.terms = terms;
+      this.numTerms = terms.termsHashPerField.bytesHash.size();
+      sortedTermIDs = terms.sortedTermIDs;
+      assert sortedTermIDs != null;
+      postingsArray = (FreqProxPostingsArray) terms.termsHashPerField.postingsArray;
+    }
+
+    public void reset() {
+      ord = -1;
+    }
+
+    public SeekStatus seekCeil(BytesRef text) {
+
+      // TODO: we could instead keep the BytesRefHash
+      // intact so this is a hash lookup
+
+      // binary search:
+      int lo = 0;
+      int hi = numTerms - 1;
+      while (hi >= lo) {
+        int mid = (lo + hi) >>> 1;
+        int textStart = postingsArray.textStarts[sortedTermIDs[mid]];
+        terms.termsHashPerField.bytePool.setBytesRef(scratch, textStart);
+        int cmp = scratch.compareTo(text);
+        if (cmp < 0) {
+          lo = mid + 1;
+        } else if (cmp > 0) {
+          hi = mid - 1;
+        } else {
+          // found:
+          ord = mid;
+          return SeekStatus.FOUND;
+        }
+      }
+
+      // not found:
+      ord = lo + 1;
+      if (ord == numTerms) {
+        return SeekStatus.END;
+      } else {
+        return SeekStatus.NOT_FOUND;
+      }
+    }
+
+    public void seekExact(long ord) {
+      this.ord = (int) ord;
+      int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]];
+      terms.termsHashPerField.bytePool.setBytesRef(scratch, textStart);
+    }
+
+    @Override
+    public BytesRef next() {
+      ord++;
+      if (ord >= numTerms) {
+        return null;
+      } else {
+        int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
+        terms.termsHashPerField.bytePool.setBytesRef(scratch, textStart);
+        return scratch;
+      }
+    }
+
+    @Override
+    public BytesRef term() {
+      return scratch;
+    }
+
+    @Override
+    public long ord() {
+      return ord;
+    }
+
+    @Override
+    public int docFreq() {
+      // We do not store this per-term, and we cannot
+      // implement this at merge time w/o an added pass
+      // through the postings:
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long totalTermFreq() {
+      // We do not store this per-term, and we cannot
+      // implement this at merge time w/o an added pass
+      // through the postings:
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
+      if (liveDocs != null) {
+        throw new IllegalArgumentException("liveDocs must be null");
+      }
+
+      FreqProxDocsEnum docsEnum;
+
+      if (!terms.hasFreq && (flags & DocsEnum.FLAG_FREQS) != 0) {
+        // Caller wants freqs but we didn't index them;
+        // don't lie:
+        throw new IllegalArgumentException("did not index freq");
+      }
+
+      if (reuse instanceof FreqProxDocsEnum) {
+        docsEnum = (FreqProxDocsEnum) reuse;
+        if (docsEnum.postingsArray != postingsArray) {
+          docsEnum = new FreqProxDocsEnum(terms, postingsArray);
+        }
+      } else {
+        docsEnum = new FreqProxDocsEnum(terms, postingsArray);
+      }
+      docsEnum.reset(sortedTermIDs[ord]);
+      return docsEnum;
+    }
+
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
+      if (liveDocs != null) {
+        throw new IllegalArgumentException("liveDocs must be null");
+      }
+      FreqProxDocsAndPositionsEnum posEnum;
+
+      if (!terms.hasProx) {
+        // Caller wants positions but we didn't index them;
+        // don't lie:
+        throw new IllegalArgumentException("did not index positions");
+      }
+
+      if (!terms.hasOffsets && (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0) {
+        // Caller wants offsets but we didn't index them;
+        // don't lie:
+        throw new IllegalArgumentException("did not index offsets");
+      }
+
+      if (reuse instanceof FreqProxDocsAndPositionsEnum) {
+        posEnum = (FreqProxDocsAndPositionsEnum) reuse;
+        if (posEnum.postingsArray != postingsArray) {
+          posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
+        }
+      } else {
+        posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
+      }
+      posEnum.reset(sortedTermIDs[ord]);
+      return posEnum;
+    }
+
+    /**
+     * Expert: Returns the TermsEnums internal state to position the TermsEnum
+     * without re-seeking the term dictionary.
+     * <p>
+     * NOTE: A seek by {@link TermState} might not capture the
+     * {@link AttributeSource}'s state. Callers must maintain the
+     * {@link AttributeSource} states separately
+     * 
+     * @see TermState
+     * @see #seekExact(BytesRef, TermState)
+     */
+    public TermState termState() throws IOException {
+      return new TermState() {
+        @Override
+        public void copyFrom(TermState other) {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+  }
+
+  private static class FreqProxDocsEnum extends DocsEnum {
+
+    final FreqProxTermsWriterPerField terms;
+    final FreqProxPostingsArray postingsArray;
+    final ByteSliceReader reader = new ByteSliceReader();
+    final boolean readTermFreq;
+    int docID;
+    int freq;
+    boolean ended;
+    int termID;
+
+    public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
+      this.terms = terms;
+      this.postingsArray = postingsArray;
+      this.readTermFreq = terms.hasFreq;
+    }
+
+    public void reset(int termID) {
+      this.termID = termID;
+      terms.termsHashPerField.initReader(reader, termID, 0);
+      ended = false;
+      docID = 0;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int freq() {
+      // Don't lie here ... don't want codecs writings lots
+      // of wasted 1s into the index:
+      if (!readTermFreq) {
+        throw new IllegalStateException("freq was not indexed");
+      } else {
+        return freq;
+      }
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      if (reader.eof()) {
+        if (ended) {
+          return NO_MORE_DOCS;
+        } else {
+          ended = true;
+          docID = postingsArray.lastDocIDs[termID];
+          if (readTermFreq) {
+            freq = postingsArray.termFreqs[termID];
+          }
+        }
+      } else {
+        int code = reader.readVInt();
+        if (!readTermFreq) {
+          docID += code;
+        } else {
+          docID += code >>> 1;
+          if ((code & 1) != 0) {
+            freq = 1;
+          } else {
+            freq = reader.readVInt();
+          }
+        }
+
+        assert docID != postingsArray.lastDocIDs[termID];
+      }
+
+      return docID;
+    }
+
+    @Override
+    public int advance(int target) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long cost() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  private static class FreqProxDocsAndPositionsEnum extends DocsAndPositionsEnum {
+
+    final FreqProxTermsWriterPerField terms;
+    final FreqProxPostingsArray postingsArray;
+    final ByteSliceReader reader = new ByteSliceReader();
+    final ByteSliceReader posReader = new ByteSliceReader();
+    final boolean readOffsets;
+    int docID;
+    int freq;
+    int pos;
+    int startOffset;
+    int endOffset;
+    int posLeft;
+    int termID;
+    boolean ended;
+    boolean hasPayload;
+    BytesRef payload = new BytesRef();
+
+    public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
+      this.terms = terms;
+      this.postingsArray = postingsArray;
+      this.readOffsets = terms.hasOffsets;
+      assert terms.hasProx;
+      assert terms.hasFreq;
+    }
+
+    public void reset(int termID) {
+      this.termID = termID;
+      terms.termsHashPerField.initReader(reader, termID, 0);
+      terms.termsHashPerField.initReader(posReader, termID, 1);
+      ended = false;
+      docID = 0;
+      posLeft = 0;
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    @Override
+    public int freq() {
+      return freq;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      while (posLeft != 0) {
+        nextPosition();
+      }
+
+      if (reader.eof()) {
+        if (ended) {
+          return NO_MORE_DOCS;
+        } else {
+          ended = true;
+          docID = postingsArray.lastDocIDs[termID];
+          freq = postingsArray.termFreqs[termID];
+        }
+      } else {
+        int code = reader.readVInt();
+        docID += code >>> 1;
+        if ((code & 1) != 0) {
+          freq = 1;
+        } else {
+          freq = reader.readVInt();
+        }
+
+        assert docID != postingsArray.lastDocIDs[termID];
+      }
+
+      posLeft = freq;
+      pos = 0;
+      startOffset = 0;
+      return docID;
+    }
+
+    @Override
+    public int advance(int target) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long cost() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int nextPosition() throws IOException {
+      assert posLeft > 0;
+      posLeft--;
+      int code = posReader.readVInt();
+      pos += code >>> 1;
+      if ((code & 1) != 0) {
+        hasPayload = true;
+        // has a payload
+        payload.length = posReader.readVInt();
+        if (payload.bytes.length < payload.length) {
+          payload.grow(payload.length);
+        }
+        posReader.readBytes(payload.bytes, 0, payload.length);
+      } else {
+        hasPayload = false;
+      }
+
+      if (readOffsets) {
+        startOffset += posReader.readVInt();
+        endOffset = startOffset + posReader.readVInt();
+      }
+
+      return pos;
+    }
+
+    @Override
+    public int startOffset() {
+      if (!readOffsets) {
+        throw new IllegalStateException("offsets were not indexed");
+      }
+      return startOffset;
+    }
+
+    @Override
+    public int endOffset() {
+      if (!readOffsets) {
+        throw new IllegalStateException("offsets were not indexed");
+      }
+      return endOffset;
+    }
+
+    @Override
+    public BytesRef getPayload() {
+      if (hasPayload) {
+        return payload;
+      } else {
+        return null;
+      }
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Thu Sep 19 20:57:09 2013
@@ -19,19 +19,62 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.util.IOUtils;
 
 final class FreqProxTermsWriter extends TermsHashConsumer {
 
   @Override
   void abort() {}
 
+  private void applyDeletes(SegmentWriteState state, Fields fields) throws IOException {
+    // Process any pending Term deletes for this newly
+    // flushed segment:
+    if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+      Map<Term,Integer> segDeletes = state.segDeletes.terms;
+      List<Term> deleteTerms = new ArrayList<Term>(segDeletes.keySet());
+      Collections.sort(deleteTerms);
+      String lastField = null;
+      TermsEnum termsEnum = null;
+      DocsEnum docsEnum = null;
+      for(Term deleteTerm : deleteTerms) {
+        if (deleteTerm.field().equals(lastField) == false) {
+          lastField = deleteTerm.field();
+          Terms terms = fields.terms(lastField);
+          if (terms != null) {
+            termsEnum = terms.iterator(termsEnum);
+          }
+        }
+
+        if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
+          docsEnum = termsEnum.docs(null, docsEnum, 0);
+          int delDocLimit = segDeletes.get(deleteTerm);
+          while (true) {
+            int doc = docsEnum.nextDoc();
+            if (doc == DocsEnum.NO_MORE_DOCS) {
+              break;
+            }
+            if (doc < delDocLimit) {
+              if (state.liveDocs == null) {
+                state.liveDocs = state.segmentInfo.getCodec().liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
+              }
+              if (state.liveDocs.get(doc)) {
+                state.delCountOnFlush++;
+                state.liveDocs.clear(doc);
+              }
+            } else {
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+
   // TODO: would be nice to factor out more of this, eg the
   // FreqProxFieldMergeState, and code to visit all Fields
   // under the same FieldInfo together, up into TermsHash*.
@@ -47,63 +90,20 @@ final class FreqProxTermsWriter extends 
     for (TermsHashConsumerPerField f : fieldsToFlush.values()) {
       final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
       if (perField.termsHashPerField.bytesHash.size() > 0) {
+        perField.sortPostings();
+        assert perField.fieldInfo.isIndexed();
         allFields.add(perField);
       }
     }
 
-    final int numAllFields = allFields.size();
-
     // Sort by field name
     CollectionUtil.introSort(allFields);
 
-    final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
+    Fields fields = new FreqProxFields(allFields);
 
-    boolean success = false;
+    applyDeletes(state, fields);
 
-    try {
-      TermsHash termsHash = null;
-      
-      /*
-    Current writer chain:
-      FieldsConsumer
-        -> IMPL: FormatPostingsTermsDictWriter
-          -> TermsConsumer
-            -> IMPL: FormatPostingsTermsDictWriter.TermsWriter
-              -> DocsConsumer
-                -> IMPL: FormatPostingsDocsWriter
-                  -> PositionsConsumer
-                    -> IMPL: FormatPostingsPositionsWriter
-       */
-      
-      for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
-        final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
-        
-        final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
-
-        // If this field has postings then add them to the
-        // segment
-        fieldWriter.flush(fieldInfo.name, consumer, state);
-        
-        TermsHashPerField perField = fieldWriter.termsHashPerField;
-        assert termsHash == null || termsHash == perField.termsHash;
-        termsHash = perField.termsHash;
-        int numPostings = perField.bytesHash.size();
-        perField.reset();
-        perField.shrinkHash(numPostings);
-        fieldWriter.reset();
-      }
-      
-      if (termsHash != null) {
-        termsHash.reset();
-      }
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(consumer);
-      } else {
-        IOUtils.closeWhileHandlingException(consumer);
-      }
-    }
+    state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state).write(fields);
   }
 
   BytesRef payload;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Thu Sep 19 20:57:09 2013
@@ -17,19 +17,10 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.Comparator;
-import java.util.Map;
-
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.RamUsageEstimator;
 
 // TODO: break into separate freq and prox writers as
@@ -42,11 +33,16 @@ final class FreqProxTermsWriterPerField 
   final FieldInfo fieldInfo;
   final DocumentsWriterPerThread.DocState docState;
   final FieldInvertState fieldState;
-  private boolean hasFreq;
-  private boolean hasProx;
-  private boolean hasOffsets;
+  boolean hasFreq;
+  boolean hasProx;
+  boolean hasOffsets;
   PayloadAttribute payloadAttribute;
   OffsetAttribute offsetAttribute;
+  long sumTotalTermFreq;
+  long sumDocFreq;
+
+  // How many docs have this field:
+  int docCount;
 
   public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
     this.termsHashPerField = termsHashPerField;
@@ -68,6 +64,12 @@ final class FreqProxTermsWriterPerField 
 
   @Override
   void finish() {
+    sumDocFreq += fieldState.uniqueTermCount;
+    sumTotalTermFreq += fieldState.length;
+    if (fieldState.length > 0) {
+      docCount++;
+    }
+
     if (hasPayloads) {
       fieldInfo.setStorePayloads();
     }
@@ -83,14 +85,6 @@ final class FreqProxTermsWriterPerField 
     return fieldInfo.name.compareTo(other.fieldInfo.name);
   }
 
-  // Called after flush
-  void reset() {
-    // Record, up front, whether our in-RAM format will be
-    // with or without term freqs:
-    setIndexOptions(fieldInfo.getIndexOptions());
-    payloadAttribute = null;
-  }
-
   private void setIndexOptions(IndexOptions indexOptions) {
     if (indexOptions == null) {
       // field could later be updated with indexed=true, so set everything on
@@ -318,233 +312,10 @@ final class FreqProxTermsWriterPerField 
 
   BytesRef payload;
 
-  /* Walk through all unique text tokens (Posting
-   * instances) found in this field and serialize them
-   * into a single RAM segment. */
-  void flush(String fieldName, FieldsConsumer consumer,  final SegmentWriteState state)
-    throws IOException {
-
-    if (!fieldInfo.isIndexed()) {
-      return; // nothing to flush, don't bother the codec with the unindexed field
-    }
-    
-    final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
-    final Comparator<BytesRef> termComp = termsConsumer.getComparator();
-
-    // CONFUSING: this.indexOptions holds the index options
-    // that were current when we first saw this field.  But
-    // it's possible this has changed, eg when other
-    // documents are indexed that cause a "downgrade" of the
-    // IndexOptions.  So we must decode the in-RAM buffer
-    // according to this.indexOptions, but then write the
-    // new segment to the directory according to
-    // currentFieldIndexOptions:
-    final IndexOptions currentFieldIndexOptions = fieldInfo.getIndexOptions();
-    assert currentFieldIndexOptions != null;
-
-    final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
-    final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
-    final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
-
-    final boolean readTermFreq = this.hasFreq;
-    final boolean readPositions = this.hasProx;
-    final boolean readOffsets = this.hasOffsets;
-
-    //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
-
-    // Make sure FieldInfo.update is working correctly!:
-    assert !writeTermFreq || readTermFreq;
-    assert !writePositions || readPositions;
-    assert !writeOffsets || readOffsets;
-
-    assert !writeOffsets || writePositions;
-
-    final Map<Term,Integer> segDeletes;
-    if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
-      segDeletes = state.segDeletes.terms;
-    } else {
-      segDeletes = null;
-    }
-
-    final int[] termIDs = termsHashPerField.sortPostings(termComp);
-    final int numTerms = termsHashPerField.bytesHash.size();
-    final BytesRef text = new BytesRef();
-    final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
-    final ByteSliceReader freq = new ByteSliceReader();
-    final ByteSliceReader prox = new ByteSliceReader();
-
-    FixedBitSet visitedDocs = new FixedBitSet(state.segmentInfo.getDocCount());
-    long sumTotalTermFreq = 0;
-    long sumDocFreq = 0;
-
-    Term protoTerm = new Term(fieldName);
-    for (int i = 0; i < numTerms; i++) {
-      final int termID = termIDs[i];
-      //System.out.println("term=" + termID);
-      // Get BytesRef
-      final int textStart = postings.textStarts[termID];
-      termsHashPerField.bytePool.setBytesRef(text, textStart);
-
-      termsHashPerField.initReader(freq, termID, 0);
-      if (readPositions || readOffsets) {
-        termsHashPerField.initReader(prox, termID, 1);
-      }
-
-      // TODO: really TermsHashPerField should take over most
-      // of this loop, including merge sort of terms from
-      // multiple threads and interacting with the
-      // TermsConsumer, only calling out to us (passing us the
-      // DocsConsumer) to handle delivery of docs/positions
-
-      final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
-
-      final int delDocLimit;
-      if (segDeletes != null) {
-        protoTerm.bytes = text;
-        final Integer docIDUpto = segDeletes.get(protoTerm);
-        if (docIDUpto != null) {
-          delDocLimit = docIDUpto;
-        } else {
-          delDocLimit = 0;
-        }
-      } else {
-        delDocLimit = 0;
-      }
-
-      // Now termStates has numToMerge FieldMergeStates
-      // which all share the same term.  Now we must
-      // interleave the docID streams.
-      int docFreq = 0;
-      long totalTermFreq = 0;
-      int docID = 0;
-
-      while(true) {
-        //System.out.println("  cycle");
-        final int termFreq;
-        if (freq.eof()) {
-          if (postings.lastDocCodes[termID] != -1) {
-            // Return last doc
-            docID = postings.lastDocIDs[termID];
-            if (readTermFreq) {
-              termFreq = postings.termFreqs[termID];
-            } else {
-              termFreq = -1;
-            }
-            postings.lastDocCodes[termID] = -1;
-          } else {
-            // EOF
-            break;
-          }
-        } else {
-          final int code = freq.readVInt();
-          if (!readTermFreq) {
-            docID += code;
-            termFreq = -1;
-          } else {
-            docID += code >>> 1;
-            if ((code & 1) != 0) {
-              termFreq = 1;
-            } else {
-              termFreq = freq.readVInt();
-            }
-          }
-
-          assert docID != postings.lastDocIDs[termID];
-        }
-
-        docFreq++;
-        assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
-
-        // NOTE: we could check here if the docID was
-        // deleted, and skip it.  However, this is somewhat
-        // dangerous because it can yield non-deterministic
-        // behavior since we may see the docID before we see
-        // the term that caused it to be deleted.  This
-        // would mean some (but not all) of its postings may
-        // make it into the index, which'd alter the docFreq
-        // for those terms.  We could fix this by doing two
-        // passes, ie first sweep marks all del docs, and
-        // 2nd sweep does the real flush, but I suspect
-        // that'd add too much time to flush.
-        visitedDocs.set(docID);
-        postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
-        if (docID < delDocLimit) {
-          // Mark it deleted.  TODO: we could also skip
-          // writing its postings; this would be
-          // deterministic (just for this Term's docs).
-          
-          // TODO: can we do this reach-around in a cleaner way????
-          if (state.liveDocs == null) {
-            state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
-          }
-          if (state.liveDocs.get(docID)) {
-            state.delCountOnFlush++;
-            state.liveDocs.clear(docID);
-          }
-        }
-
-        totalTermFreq += termFreq;
-        
-        // Carefully copy over the prox + payload info,
-        // changing the format to match Lucene's segment
-        // format.
-
-        if (readPositions || readOffsets) {
-          // we did record positions (& maybe payload) and/or offsets
-          int position = 0;
-          int offset = 0;
-          for(int j=0;j<termFreq;j++) {
-            final BytesRef thisPayload;
-
-            if (readPositions) {
-              final int code = prox.readVInt();
-              position += code >>> 1;
-
-              if ((code & 1) != 0) {
-
-                // This position has a payload
-                final int payloadLength = prox.readVInt();
-
-                if (payload == null) {
-                  payload = new BytesRef();
-                  payload.bytes = new byte[payloadLength];
-                } else if (payload.bytes.length < payloadLength) {
-                  payload.grow(payloadLength);
-                }
-
-                prox.readBytes(payload.bytes, 0, payloadLength);
-                payload.length = payloadLength;
-                thisPayload = payload;
-
-              } else {
-                thisPayload = null;
-              }
-
-              if (readOffsets) {
-                final int startOffset = offset + prox.readVInt();
-                final int endOffset = startOffset + prox.readVInt();
-                if (writePositions) {
-                  if (writeOffsets) {
-                    assert startOffset >=0 && endOffset >= startOffset : "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset;
-                    postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
-                  } else {
-                    postingsConsumer.addPosition(position, thisPayload, -1, -1);
-                  }
-                }
-                offset = startOffset;
-              } else if (writePositions) {
-                postingsConsumer.addPosition(position, thisPayload, -1, -1);
-              }
-            }
-          }
-        }
-        postingsConsumer.finishDoc();
-      }
-      termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1));
-      sumTotalTermFreq += totalTermFreq;
-      sumDocFreq += docFreq;
-    }
+  int[] sortedTermIDs;
 
-    termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
+  void sortPostings() {
+    assert sortedTermIDs == null;
+    sortedTermIDs = termsHashPerField.sortPostings();
   }
 }

Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java?rev=1524840&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java Thu Sep 19 20:57:09 2013
@@ -0,0 +1,136 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.Bits;
+
+import static org.apache.lucene.index.FilterAtomicReader.FilterFields;
+import static org.apache.lucene.index.FilterAtomicReader.FilterTerms;
+import static org.apache.lucene.index.FilterAtomicReader.FilterTermsEnum;
+
+/** A {@link Fields} implementation that merges multiple
+ *  Fields into one, and maps around deleted documents.
+ *  This is used for merging. */
+
+class MappedMultiFields extends FilterFields {
+  final MergeState mergeState;
+
+  public MappedMultiFields(MergeState mergeState, MultiFields multiFields) {
+    super(multiFields);
+    this.mergeState = mergeState;
+  }
+
+  @Override
+  public Terms terms(String field) throws IOException {
+    MultiTerms terms = (MultiTerms) in.terms(field);
+    if (terms == null) {
+      return null;
+    } else {
+      return new MappedMultiTerms(mergeState, terms);
+    }
+  }
+
+  private static class MappedMultiTerms extends FilterTerms {
+    final MergeState mergeState;
+
+    public MappedMultiTerms(MergeState mergeState, MultiTerms multiTerms) {
+      super(multiTerms);
+      this.mergeState = mergeState;
+    }
+
+    @Override
+    public TermsEnum iterator(TermsEnum reuse) throws IOException {
+      return new MappedMultiTermsEnum(mergeState, (MultiTermsEnum) in.iterator(reuse));
+    }
+
+    @Override
+    public long size() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long getSumTotalTermFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long getSumDocFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int getDocCount() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  private static class MappedMultiTermsEnum extends FilterTermsEnum {
+    final MergeState mergeState;
+
+    public MappedMultiTermsEnum(MergeState mergeState, MultiTermsEnum multiTermsEnum) {
+      super(multiTermsEnum);
+      this.mergeState = mergeState;
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
+      if (liveDocs != null) {
+        throw new IllegalArgumentException("liveDocs must be null");
+      }
+      MappingMultiDocsEnum mappingDocsEnum;
+      if (reuse instanceof MappingMultiDocsEnum) {
+        mappingDocsEnum = (MappingMultiDocsEnum) reuse;
+      } else {
+        mappingDocsEnum = new MappingMultiDocsEnum(mergeState);
+      }
+      
+      MultiDocsEnum docsEnum = (MultiDocsEnum) in.docs(liveDocs, mappingDocsEnum.multiDocsEnum, flags);
+      mappingDocsEnum.reset(docsEnum);
+      return mappingDocsEnum;
+    }
+
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+      if (liveDocs != null) {
+        throw new IllegalArgumentException("liveDocs must be null");
+      }
+      MappingMultiDocsAndPositionsEnum mappingDocsAndPositionsEnum;
+      if (reuse instanceof MappingMultiDocsAndPositionsEnum) {
+        mappingDocsAndPositionsEnum = (MappingMultiDocsAndPositionsEnum) reuse;
+      } else {
+        mappingDocsAndPositionsEnum = new MappingMultiDocsAndPositionsEnum(mergeState);
+      }
+      
+      MultiDocsAndPositionsEnum docsAndPositionsEnum = (MultiDocsAndPositionsEnum) in.docsAndPositions(liveDocs, mappingDocsAndPositionsEnum.multiDocsAndPositionsEnum, flags);
+      mappingDocsAndPositionsEnum.reset(docsAndPositionsEnum);
+      return mappingDocsAndPositionsEnum;
+    }
+  }
+}

Copied: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java (from r1522894, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java?p2=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java&r1=1522894&r2=1524840&rev=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsAndPositionsEnum.java Thu Sep 19 20:57:09 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs;
+package org.apache.lucene.index;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -18,9 +18,6 @@ package org.apache.lucene.codecs;
  */
 
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MultiDocsAndPositionsEnum;
 import org.apache.lucene.index.MultiDocsAndPositionsEnum.EnumWithSlice;
 
 import java.io.IOException;
@@ -32,7 +29,7 @@ import java.io.IOException;
  * @lucene.experimental
  */
 
-public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
+final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
   private MultiDocsAndPositionsEnum.EnumWithSlice[] subs;
   int numSubs;
   int upto;
@@ -41,9 +38,11 @@ public final class MappingMultiDocsAndPo
   int currentBase;
   int doc = -1;
   private MergeState mergeState;
+  MultiDocsAndPositionsEnum multiDocsAndPositionsEnum;
 
   /** Sole constructor. */
-  public MappingMultiDocsAndPositionsEnum() {
+  public MappingMultiDocsAndPositionsEnum(MergeState mergeState) {
+    this.mergeState = mergeState;
   }
 
   MappingMultiDocsAndPositionsEnum reset(MultiDocsAndPositionsEnum postingsEnum) {
@@ -51,15 +50,10 @@ public final class MappingMultiDocsAndPo
     this.subs = postingsEnum.getSubs();
     upto = -1;
     current = null;
+    this.multiDocsAndPositionsEnum = postingsEnum;
     return this;
   }
 
-  /** Sets the {@link MergeState}, which is used to re-map
-   *  document IDs. */
-  public void setMergeState(MergeState mergeState) {
-    this.mergeState = mergeState;
-  }
-  
   /** How many sub-readers we are merging.
    *  @see #getSubs */
   public int getNumSubs() {
@@ -103,6 +97,13 @@ public final class MappingMultiDocsAndPo
 
       int doc = current.nextDoc();
       if (doc != NO_MORE_DOCS) {
+
+        mergeState.checkAbortCount++;
+        if (mergeState.checkAbortCount > 60000) {
+          mergeState.checkAbort.work(mergeState.checkAbortCount/5.0);
+          mergeState.checkAbortCount = 0;
+        }
+
         // compact deletions
         doc = currentMap.get(doc);
         if (doc == -1) {

Copied: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java (from r1522894, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java?p2=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java&r1=1522894&r2=1524840&rev=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MappingMultiDocsEnum.java Thu Sep 19 20:57:09 2013
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs;
+package org.apache.lucene.index;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,9 +17,6 @@ package org.apache.lucene.codecs;
  * limitations under the License.
  */
 
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.MergeState;
-import org.apache.lucene.index.MultiDocsEnum;
 import org.apache.lucene.index.MultiDocsEnum.EnumWithSlice;
 
 import java.io.IOException;
@@ -31,7 +28,7 @@ import java.io.IOException;
  * @lucene.experimental
  */
 
-public final class MappingMultiDocsEnum extends DocsEnum {
+final class MappingMultiDocsEnum extends DocsEnum {
   private MultiDocsEnum.EnumWithSlice[] subs;
   int numSubs;
   int upto;
@@ -39,26 +36,23 @@ public final class MappingMultiDocsEnum 
   DocsEnum current;
   int currentBase;
   int doc = -1;
-  private MergeState mergeState;
+  private final MergeState mergeState;
+  MultiDocsEnum multiDocsEnum;
 
   /** Sole constructor. */
-  public MappingMultiDocsEnum() {
+  public MappingMultiDocsEnum(MergeState mergeState) {
+    this.mergeState = mergeState;
   }
 
   MappingMultiDocsEnum reset(MultiDocsEnum docsEnum) {
     this.numSubs = docsEnum.getNumSubs();
     this.subs = docsEnum.getSubs();
+    this.multiDocsEnum = docsEnum;
     upto = -1;
     current = null;
     return this;
   }
 
-  /** Sets the {@link MergeState}, which is used to re-map
-   *  document IDs. */
-  public void setMergeState(MergeState mergeState) {
-    this.mergeState = mergeState;
-  }
-  
   /** How many sub-readers we are merging.
    *  @see #getSubs */
   public int getNumSubs() {
@@ -103,6 +97,13 @@ public final class MappingMultiDocsEnum 
 
       int doc = current.nextDoc();
       if (doc != NO_MORE_DOCS) {
+
+        mergeState.checkAbortCount++;
+        if (mergeState.checkAbortCount > 60000) {
+          mergeState.checkAbort.work(mergeState.checkAbortCount/5.0);
+          mergeState.checkAbortCount = 0;
+        }
+
         // compact deletions
         doc = currentMap.get(doc);
         if (doc == -1) {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MergeState.java Thu Sep 19 20:57:09 2013
@@ -151,6 +151,10 @@ public class MergeState {
   /** InfoStream for debugging messages. */
   public final InfoStream infoStream;
 
+  /** Counter used for periodic calls to checkAbort
+   * @lucene.internal */
+  public int checkAbortCount;
+
   // TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
   // but is this really so expensive to compute again in different components, versus once in SM?
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Thu Sep 19 20:57:09 2013
@@ -19,7 +19,6 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Comparator;
 import java.util.List;
 
 import org.apache.lucene.util.BytesRef;
@@ -36,7 +35,6 @@ import org.apache.lucene.util.automaton.
 public final class MultiTerms extends Terms {
   private final Terms[] subs;
   private final ReaderSlice[] subSlices;
-  private final Comparator<BytesRef> termComp;
   private final boolean hasOffsets;
   private final boolean hasPositions;
   private final boolean hasPayloads;
@@ -51,28 +49,16 @@ public final class MultiTerms extends Te
     this.subs = subs;
     this.subSlices = subSlices;
     
-    Comparator<BytesRef> _termComp = null;
     assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
     boolean _hasOffsets = true;
     boolean _hasPositions = true;
     boolean _hasPayloads = false;
     for(int i=0;i<subs.length;i++) {
-      if (_termComp == null) {
-        _termComp = subs[i].getComparator();
-      } else {
-        // We cannot merge sub-readers that have
-        // different TermComps
-        final Comparator<BytesRef> subTermComp = subs[i].getComparator();
-        if (subTermComp != null && !subTermComp.equals(_termComp)) {
-          throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
-        }
-      }
       _hasOffsets &= subs[i].hasOffsets();
       _hasPositions &= subs[i].hasPositions();
       _hasPayloads |= subs[i].hasPayloads();
     }
 
-    termComp = _termComp;
     hasOffsets = _hasOffsets;
     hasPositions = _hasPositions;
     hasPayloads = hasPositions && _hasPayloads; // if all subs have pos, and at least one has payloads.
@@ -158,11 +144,6 @@ public final class MultiTerms extends Te
   }
 
   @Override
-  public Comparator<BytesRef> getComparator() {
-    return termComp;
-  }
-
-  @Override
   public boolean hasOffsets() {
     return hasOffsets;
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -23,7 +23,6 @@ import org.apache.lucene.util.Bits;
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.Comparator;
 
 /**
  * Exposes {@link TermsEnum} API, merged from {@link TermsEnum} API of sub-segments.
@@ -47,7 +46,6 @@ public final class MultiTermsEnum extend
   private int numTop;
   private int numSubs;
   private BytesRef current;
-  private Comparator<BytesRef> termComp;
 
   static class TermsEnumIndex {
     public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
@@ -95,36 +93,18 @@ public final class MultiTermsEnum extend
     return current;
   }
 
-  @Override
-  public Comparator<BytesRef> getComparator() {
-    return termComp;
-  }
-
   /** The terms array must be newly created TermsEnum, ie
    *  {@link TermsEnum#next} has not yet been called. */
   public TermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException {
     assert termsEnumsIndex.length <= top.length;
     numSubs = 0;
     numTop = 0;
-    termComp = null;
     queue.clear();
     for(int i=0;i<termsEnumsIndex.length;i++) {
 
       final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i];
       assert termsEnumIndex != null;
 
-      // init our term comp
-      if (termComp == null) {
-        queue.termComp = termComp = termsEnumIndex.termsEnum.getComparator();
-      } else {
-        // We cannot merge sub-readers that have
-        // different TermComps
-        final Comparator<BytesRef> subTermComp = termsEnumIndex.termsEnum.getComparator();
-        if (subTermComp != null && !subTermComp.equals(termComp)) {
-          throw new IllegalStateException("sub-readers have different BytesRef.Comparators: " + subTermComp + " vs " + termComp + "; cannot merge");
-        }
-      }
-
       final BytesRef term = termsEnumIndex.termsEnum.next();
       if (term != null) {
         final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex];
@@ -149,7 +129,7 @@ public final class MultiTermsEnum extend
     numTop = 0;
 
     boolean seekOpt = false;
-    if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
+    if (lastSeek != null && lastSeek.compareTo(term) <= 0) {
       seekOpt = true;
     }
 
@@ -167,7 +147,7 @@ public final class MultiTermsEnum extend
       if (seekOpt) {
         final BytesRef curTerm = currentSubs[i].current;
         if (curTerm != null) {
-          final int cmp = termComp.compare(term, curTerm);
+          final int cmp = term.compareTo(curTerm);
           if (cmp == 0) {
             status = true;
           } else if (cmp < 0) {
@@ -201,7 +181,7 @@ public final class MultiTermsEnum extend
     lastSeekExact = false;
 
     boolean seekOpt = false;
-    if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
+    if (lastSeek != null && lastSeek.compareTo(term) <= 0) {
       seekOpt = true;
     }
 
@@ -219,7 +199,7 @@ public final class MultiTermsEnum extend
       if (seekOpt) {
         final BytesRef curTerm = currentSubs[i].current;
         if (curTerm != null) {
-          final int cmp = termComp.compare(term, curTerm);
+          final int cmp = term.compareTo(curTerm);
           if (cmp == 0) {
             status = SeekStatus.FOUND;
           } else if (cmp < 0) {
@@ -519,14 +499,13 @@ public final class MultiTermsEnum extend
   }
 
   private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
-    Comparator<BytesRef> termComp;
     TermMergeQueue(int size) {
       super(size);
     }
 
     @Override
     protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
-      final int cmp = termComp.compare(termsA.current, termsB.current);
+      final int cmp = termsA.current.compareTo(termsB.current);
       if (cmp != 0) {
         return cmp < 0;
       } else {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java Thu Sep 19 20:57:09 2013
@@ -22,9 +22,8 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FieldInfosWriter;
-import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.codecs.FieldInfosWriter;
 import org.apache.lucene.codecs.StoredFieldsWriter;
 import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.index.FieldInfo.DocValuesType;
@@ -375,19 +374,10 @@ final class SegmentMerger {
       docBase += maxDoc;
     }
 
-    final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);
-    boolean success = false;
-    try {
-      consumer.merge(mergeState,
-                     new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
-                                     slices.toArray(ReaderSlice.EMPTY_ARRAY)));
-      success = true;
-    } finally {
-      if (success) {
-        IOUtils.close(consumer);
-      } else {
-        IOUtils.closeWhileHandlingException(consumer);
-      }
-    }
+    Fields mergedFields = new MappedMultiFields(mergeState, 
+                                                new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
+                                                                slices.toArray(ReaderSlice.EMPTY_ARRAY)));
+
+    codec.postingsFormat().fieldsConsumer(segmentWriteState).write(mergedFields);
   }
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -125,11 +124,6 @@ class SortedDocValuesTermsEnum extends T
   }
 
   @Override
-  public Comparator<BytesRef> getComparator() {
-    return BytesRef.getUTF8SortedAsUnicodeComparator();
-  }
-
-  @Override
   public void seekExact(BytesRef term, TermState state) throws IOException {
     assert state != null && state instanceof OrdTermState;
     this.seekExact(((OrdTermState)state).ord);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -125,11 +124,6 @@ class SortedSetDocValuesTermsEnum extend
   }
 
   @Override
-  public Comparator<BytesRef> getComparator() {
-    return BytesRef.getUTF8SortedAsUnicodeComparator();
-  }
-
-  @Override
   public void seekExact(BytesRef term, TermState state) throws IOException {
     assert state != null && state instanceof OrdTermState;
     this.seekExact(((OrdTermState)state).ord);

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java Thu Sep 19 20:57:09 2013
@@ -66,12 +66,6 @@ final class TermVectorsConsumer extends 
         hasVectors = false;
       }
     }
-
-    for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
-      TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField) field;
-      perField.termsHashPerField.reset();
-      perField.shrinkHash();
-    }
   }
 
   /** Fills in no-term-vectors for all docs we haven't seen

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Thu Sep 19 20:57:09 2013
@@ -156,7 +156,7 @@ final class TermVectorsConsumerPerField 
     TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
     final TermVectorsWriter tv = termsWriter.writer;
 
-    final int[] termIDs = termsHashPerField.sortPostings(tv.getComparator());
+    final int[] termIDs = termsHashPerField.sortPostings();
 
     tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets, hasPayloads);
     
@@ -191,11 +191,6 @@ final class TermVectorsConsumerPerField 
     fieldInfo.setStoreTermVectors();
   }
 
-  void shrinkHash() {
-    termsHashPerField.shrinkHash(maxNumPostings);
-    maxNumPostings = 0;
-  }
-
   @Override
   void start(IndexableField f) {
     if (doVectorOffsets) {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
@@ -75,13 +74,6 @@ public abstract class Terms {
     }
   }
 
-  /** Return the BytesRef Comparator used to sort terms
-   *  provided by the iterator.  This method may return null
-   *  if there are no terms.  This method may be invoked
-   *  many times; it's best to cache a single instance &
-   *  reuse it. */
-  public abstract Comparator<BytesRef> getComparator();
-
   /** Returns the number of terms for this field, or -1 if this 
    *  measure isn't stored by the codec. Note that, just like 
    *  other term measures, this measure does not take deleted 
@@ -109,6 +101,8 @@ public abstract class Terms {
    *  measures, this measure does not take deleted documents
    *  into account. */
   public abstract int getDocCount() throws IOException;
+
+  // TODO: shouldn't we have hasFreq() as well?
   
   /** Returns true if documents in this field store offsets. */
   public abstract boolean hasOffsets();

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
@@ -33,8 +32,9 @@ import org.apache.lucene.util.BytesRefIt
  * #docs}.
  * 
  * <p>Term enumerations are always ordered by
- * {@link #getComparator}.  Each term in the enumeration is
- * greater than the one before it.</p>
+ * BytesRef.compareTo, which is Unicode sort
+ * order if the terms are UTF-8 bytes.  Each term in the
+ * enumeration is greater than the one before it.</p>
  *
  * <p>The TermsEnum is unpositioned when you first obtain it
  * and you must first successfully call {@link #next} or one
@@ -230,11 +230,6 @@ public abstract class TermsEnum implemen
     }
 
     @Override
-    public Comparator<BytesRef> getComparator() {
-      return null;
-    }
-      
-    @Override
     public int docFreq() {
       throw new IllegalStateException("this method should never be called");
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.util.ByteBlockPool;
@@ -77,13 +76,7 @@ final class TermsHashPerField extends In
       nextPerField = null;
   }
 
-  void shrinkHash(int targetSize) {
-    // Fully free the bytesHash on each flush but keep the pool untouched
-    // bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too
-    bytesHash.clear(false);
-  }
-
-  public void reset() {
+  void reset() {
     bytesHash.clear(false);
     if (nextPerField != null)
       nextPerField.reset();
@@ -107,8 +100,8 @@ final class TermsHashPerField extends In
   }
 
   /** Collapse the hash table & sort in-place. */
-  public int[] sortPostings(Comparator<BytesRef> termComp) {
-   return bytesHash.sort(termComp);
+  public int[] sortPostings() {
+    return bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
   }
 
   private boolean doCall;
@@ -136,7 +129,8 @@ final class TermsHashPerField extends In
 
   // Secondary entry point (for 2nd & subsequent TermsHash),
   // because token text has already been "interned" into
-  // textStart, so we hash by textStart
+  // textStart, so we hash by textStart.  term vectors use
+  // this API.
   public void add(int textStart) throws IOException {
     int termID = bytesHash.addByPoolOffset(textStart);
     if (termID >= 0) {      // New posting
@@ -173,7 +167,8 @@ final class TermsHashPerField extends In
     }
   }
 
-  // Primary entry point (for first TermsHash)
+  // Primary entry point (for first TermsHash); postings use
+  // this API.
   @Override
   void add() throws IOException {
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java Thu Sep 19 20:57:09 2013
@@ -101,7 +101,7 @@ class ConstantScoreAutoRewrite extends T
     } else {
       final BooleanQuery bq = getTopLevelQuery();
       final BytesRefHash pendingTerms = col.pendingTerms;
-      final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
+      final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
       for(int i = 0; i < size; i++) {
         final int pos = sort[i];
         // docFreq is not used for constant score here, we pass 1

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DocTermOrdsRewriteMethod.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
@@ -26,7 +25,6 @@ import org.apache.lucene.index.SortedSet
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.OpenBitSet;
 
 /**
@@ -91,11 +89,6 @@ public final class DocTermOrdsRewriteMet
       TermsEnum termsEnum = query.getTermsEnum(new Terms() {
         
         @Override
-        public Comparator<BytesRef> getComparator() {
-          return BytesRef.getUTF8SortedAsUnicodeComparator();
-        }
-        
-        @Override
         public TermsEnum iterator(TermsEnum reuse) {
           return docTermOrds.termsEnum();
         }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Thu Sep 19 20:57:09 2013
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
  */
 
 import java.io.IOException;
-import java.util.Comparator;
 
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
@@ -26,7 +25,6 @@ import org.apache.lucene.index.SortedDoc
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.OpenBitSet;
 
 /**
@@ -91,11 +89,6 @@ public final class FieldCacheRewriteMeth
       TermsEnum termsEnum = query.getTermsEnum(new Terms() {
         
         @Override
-        public Comparator<BytesRef> getComparator() {
-          return BytesRef.getUTF8SortedAsUnicodeComparator();
-        }
-        
-        @Override
         public TermsEnum iterator(TermsEnum reuse) {
           return fcsi.termsEnum();
         }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1524840&r1=1524839&r2=1524840&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Thu Sep 19 20:57:09 2013
@@ -46,7 +46,7 @@ import org.apache.lucene.util.automaton.
  * to the specified filter term.
  *
  * <p>Term enumerations are always ordered by
- * {@link #getComparator}.  Each term in the enumeration is
+ * {@link BytesRef#compareTo}.  Each term in the enumeration is
  * greater than all that precede it.</p>
  */
 public class FuzzyTermsEnum extends TermsEnum {
@@ -293,11 +293,6 @@ public class FuzzyTermsEnum extends Term
   }
   
   @Override
-  public Comparator<BytesRef> getComparator() {
-    return actualEnum.getComparator();
-  }
-  
-  @Override
   public long ord() throws IOException {
     return actualEnum.ord();
   }