You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2011/01/16 03:25:26 UTC

svn commit: r1059434 [1/2] - in /lucene/dev/branches/realtime_search: ./ dev-tools/idea/.idea/copyright/ lucene/ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/ lucene/contrib/instantiated/src/test/org/apache/lucene/store/ins...

Author: buschmi
Date: Sun Jan 16 02:25:24 2011
New Revision: 1059434

URL: http://svn.apache.org/viewvc?rev=1059434&view=rev
Log:
Merging r1058717 through r1059431 into realtime branch

Added:
    lucene/dev/branches/realtime_search/dev-tools/idea/.idea/copyright/
      - copied from r1059431, lucene/dev/trunk/dev-tools/idea/.idea/copyright/
    lucene/dev/branches/realtime_search/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml
      - copied unchanged from r1059431, lucene/dev/trunk/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml
    lucene/dev/branches/realtime_search/dev-tools/idea/.idea/copyright/profiles_settings.xml
      - copied unchanged from r1059431, lucene/dev/trunk/dev-tools/idea/.idea/copyright/profiles_settings.xml
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermStats.java
      - copied unchanged from r1059431, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermStats.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
      - copied unchanged from r1059431, lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestMaxTermFrequency.java
Modified:
    lucene/dev/branches/realtime_search/   (props changed)
    lucene/dev/branches/realtime_search/lucene/   (props changed)
    lucene/dev/branches/realtime_search/lucene/CHANGES.txt
    lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
    lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
    lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
    lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
    lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
    lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
    lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java
    lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java
    lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTerms.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/function/CustomScoreQuery.java
    lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/function/ValueSourceQuery.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/index/TestCodecs.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
    lucene/dev/branches/realtime_search/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java
    lucene/dev/branches/realtime_search/modules/   (props changed)
    lucene/dev/branches/realtime_search/solr/   (props changed)
    lucene/dev/branches/realtime_search/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/realtime_search/solr/KEYS   (props changed)
    lucene/dev/branches/realtime_search/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/realtime_search/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/realtime_search/solr/README.txt   (props changed)
    lucene/dev/branches/realtime_search/solr/build.xml   (props changed)
    lucene/dev/branches/realtime_search/solr/client/   (props changed)
    lucene/dev/branches/realtime_search/solr/common-build.xml   (props changed)
    lucene/dev/branches/realtime_search/solr/contrib/   (props changed)
    lucene/dev/branches/realtime_search/solr/contrib/clustering/CHANGES.txt
    lucene/dev/branches/realtime_search/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/dev/branches/realtime_search/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java
    lucene/dev/branches/realtime_search/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java
    lucene/dev/branches/realtime_search/solr/example/   (props changed)
    lucene/dev/branches/realtime_search/solr/lib/   (props changed)
    lucene/dev/branches/realtime_search/solr/site/   (props changed)
    lucene/dev/branches/realtime_search/solr/src/   (props changed)
    lucene/dev/branches/realtime_search/solr/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/dev/branches/realtime_search/solr/testlogging.properties   (props changed)

Modified: lucene/dev/branches/realtime_search/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/CHANGES.txt?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/realtime_search/lucene/CHANGES.txt Sun Jan 16 02:25:24 2011
@@ -359,6 +359,9 @@ New features
   terms dict.  This impl stores the indexed terms in an FST, which is
   much more RAM efficient than FixedGapTermsIndex.  (Mike McCandless)
 
+* LUCENE-2862: Added TermsEnum.totalTermFreq() and
+  Terms.getSumTotalTermFreq().  (Mike McCandless, Robert Muir)
+
 Optimizations
 
 * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
@@ -737,6 +740,10 @@ New features
   is no longer needed and discouraged for that use case. Directly wrapping
   Query improves performance, as out-of-order collection is now supported.
   (Uwe Schindler)
+
+* LUCENE-2864: Add getMaxTermFrequency (maximum within-document TF) to 
+  FieldInvertState so that it can be used in Similarity.computeNorm.
+  (Robert Muir)
   
 Optimizations
 

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java Sun Jan 16 02:25:24 2011
@@ -238,6 +238,10 @@ public class InstantiatedIndex
           while((text = termsEnum.next()) != null) {
             String termText = text.utf8ToString();
             InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
+            final long totalTermFreq = termsEnum.totalTermFreq();
+            if (totalTermFreq != -1) {
+              instantiatedTerm.addPositionsCount(totalTermFreq);
+            }
             getTermsByFieldAndText().get(field).put(termText, instantiatedTerm);
             instantiatedTerm.setTermIndex(terms.size());
             terms.add(instantiatedTerm);

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Sun Jan 16 02:25:24 2011
@@ -398,12 +398,22 @@ public class InstantiatedIndexReader ext
         if (i < 0) {
           i = -i - 1;
         }
-        if (i >= orderedTerms.length || !orderedTerms[i].field().equals(field)) {
+        if (i >= orderedTerms.length || orderedTerms[i].field() != field) {
           // field does not exist
           return null;
         }
         final int startLoc = i;
 
+        // TODO: heavy to do this here; would be better to
+        // do it up front & cache
+        long sum = 0;
+        int upto = i;
+        while(upto < orderedTerms.length && orderedTerms[i].field() == field) {
+          sum += orderedTerms[i].getTotalTermFreq();
+          upto++;
+        }
+        final long sumTotalTermFreq = sum;
+
         return new Terms() {
           @Override 
           public TermsEnum iterator() {
@@ -411,6 +421,11 @@ public class InstantiatedIndexReader ext
           }
 
           @Override
+          public long getSumTotalTermFreq() {
+            return sumTotalTermFreq;
+          }
+
+          @Override
           public Comparator<BytesRef> getComparator() {
             return BytesRef.getUTF8SortedAsUnicodeComparator();
           }

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Sun Jan 16 02:25:24 2011
@@ -315,6 +315,7 @@ public class InstantiatedIndexWriter imp
           }
           associatedDocuments[associatedDocuments.length - 1] = info;          
           term.setAssociatedDocuments(associatedDocuments);
+          term.addPositionsCount(positions.length);
 
           // todo optimize, only if term vector?
           informationByTermOfCurrentDocument.put(term, info);

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java Sun Jan 16 02:25:24 2011
@@ -45,6 +45,8 @@ public class InstantiatedTerm
   
   private Term term;
 
+  private long totalTermFreq;
+
   /**
    * index of term in InstantiatedIndex
    * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
@@ -92,6 +94,14 @@ public class InstantiatedTerm
     this.associatedDocuments = associatedDocuments;
   }
 
+  void addPositionsCount(long count) {
+    totalTermFreq += count;
+  }
+
+  public long getTotalTermFreq() {
+    return totalTermFreq;
+  }
+
   /**
    * Finds index to the first beyond the current whose document number is
    * greater than or equal to <i>target</i>, -1 if there is no such element.

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Sun Jan 16 02:25:24 2011
@@ -24,7 +24,6 @@ import org.apache.lucene.index.TermState
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -111,6 +110,12 @@ public class InstantiatedTermsEnum exten
   }
 
   @Override
+  public long totalTermFreq() {
+    final long v = terms[upto].getTotalTermFreq();
+    return v == 0 ? -1 : v;
+  }
+
+  @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
     if (reuse == null || !(reuse instanceof InstantiatedDocsEnum)) {
       reuse = new InstantiatedDocsEnum();

Modified: lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Sun Jan 16 02:25:24 2011
@@ -66,6 +66,7 @@ public class TestIndicesEquals extends L
     // create dir data
     IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
         TEST_VERSION_CURRENT, new MockAnalyzer()));
+    
     for (int i = 0; i < 20; i++) {
       Document document = new Document();
       assembleDocument(document, i);
@@ -395,6 +396,10 @@ public class TestIndicesEquals extends L
         }
 
         assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
+        final long totalTermFreq = aprioriTermEnum.totalTermFreq();
+        if (totalTermFreq != -1) {
+          assertEquals(totalTermFreq, testTermEnum.totalTermFreq());
+        }
 
         // compare termDocs seeking
 

Modified: lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Sun Jan 16 02:25:24 2011
@@ -610,6 +610,8 @@ public class MemoryIndex implements Seri
     /** Term for this field's fieldName, lazily computed on demand */
     public transient Term template;
 
+    private final long sumTotalTermFreq;
+
     private static final long serialVersionUID = 2882195016849084649L;  
 
     public Info(HashMap<BytesRef,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
@@ -617,6 +619,15 @@ public class MemoryIndex implements Seri
       this.numTokens = numTokens;
       this.numOverlapTokens = numOverlapTokens;
       this.boost = boost;
+      long sum = 0;
+      for(Map.Entry<BytesRef,ArrayIntList> ent : terms.entrySet()) {
+        sum += ent.getValue().size();
+      }
+      sumTotalTermFreq = sum;
+    }
+
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
     }
     
     /**
@@ -826,6 +837,11 @@ public class MemoryIndex implements Seri
               public long getUniqueTermCount() {
                 return info.sortedTerms.length;
               }
+
+              @Override
+              public long getSumTotalTermFreq() {
+                return info.getSumTotalTermFreq();
+              }
             };
           }
         }
@@ -896,6 +912,11 @@ public class MemoryIndex implements Seri
       }
 
       @Override
+      public long totalTermFreq() {
+        return info.sortedTerms[termUpto].getValue().size();
+      }
+
+      @Override
       public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
         if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
           reuse = new MemoryDocsEnum();

Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java Sun Jan 16 02:25:24 2011
@@ -176,15 +176,34 @@ public class HighFreqTerms {
     return ts;
   }
   
-  public static long getTotalTermFreq(IndexReader reader, String field, BytesRef termtext) throws Exception {
-    BytesRef br = termtext;
+  public static long getTotalTermFreq(IndexReader reader, String field, BytesRef termText) throws Exception {
+
     long totalTF = 0;
-    Bits skipDocs = MultiFields.getDeletedDocs(reader);
-    DocsEnum de = MultiFields.getTermDocsEnum(reader, skipDocs, field, br);
-    // if term is not in index return totalTF of 0
-    if (de == null) {
+    
+    Terms terms = MultiFields.getTerms(reader, field);
+    if (terms == null) {
+      return 0;
+    }
+
+    TermsEnum termsEnum = terms.iterator();
+    if (termsEnum.seek(termText) != TermsEnum.SeekStatus.FOUND) {
       return 0;
     }
+
+    Bits skipDocs = MultiFields.getDeletedDocs(reader);
+    if (skipDocs == null) {
+      // TODO: we could do this up front, during the scan
+      // (next()), instead of after-the-fact here w/ seek,
+      // if the codec supports it and there are no del
+      // docs...
+      final long totTF = termsEnum.totalTermFreq();
+      if (totTF != -1) {
+        return totTF;
+      }
+    }
+    
+    DocsEnum de = termsEnum.docs(skipDocs, null);
+
     // use DocsEnum.read() and BulkResult api
     final DocsEnum.BulkReadResult bulkresult = de.getBulkResult();
     int count;

Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java Sun Jan 16 02:25:24 2011
@@ -41,4 +41,9 @@ public final class TermStats {
   String getTermText() {
     return termtext.utf8ToString();
   }
+
+  @Override
+  public String toString() {
+    return("TermStats: term=" + termtext.utf8ToString() + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq);
+  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java Sun Jan 16 02:25:24 2011
@@ -17,15 +17,16 @@ package org.apache.lucene.misc;
  * limitations under the License.
  */
 
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
@@ -41,8 +42,10 @@ public class TestHighFreqTerms extends L
     writer = new IndexWriter(dir, newIndexWriterConfig(random,
        TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))
        .setMaxBufferedDocs(2));
+    writer.setInfoStream(VERBOSE ? System.out : null);
     indexDocs(writer);
     reader = IndexReader.open(dir, true);
+    _TestUtil.checkIndex(dir);
   }
   
   @AfterClass
@@ -75,8 +78,8 @@ public class TestHighFreqTerms extends L
     String field="FIELD_1";
     TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
     for (int i = 0; i < terms.length; i++) {
-      if (i >0){
-       assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
+      if (i > 0) {
+        assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
       }
     }    
   }
@@ -134,11 +137,12 @@ public class TestHighFreqTerms extends L
     TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
     TermStats[] termsWithTF = HighFreqTerms.sortByTotalTermFreq(reader, terms);
  
-  for (int i = 0; i < termsWithTF.length; i++) {
-    // check that they are sorted by descending termfreq order
-    if (i >0){
-      assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq > termsWithTF[i].totalTermFreq);
-     }
+    for (int i = 0; i < termsWithTF.length; i++) {
+      // check that they are sorted by descending termfreq
+      // order
+      if (i > 0) {
+        assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq >= termsWithTF[i].totalTermFreq);
+      }
     } 
   }
   

Modified: lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/branches/realtime_search/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Sun Jan 16 02:25:24 2011
@@ -123,7 +123,11 @@ public final class FieldCacheRewriteMeth
         public TermsEnum iterator() throws IOException {
           return fcsi.getTermsEnum();
         }
-        
+
+        @Override
+        public long getSumTotalTermFreq() {
+          return -1;
+        }
       });
       
       assert termsEnum != null;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun Jan 16 02:25:24 2011
@@ -610,6 +610,8 @@ public class CheckIndex {
 
         Comparator<BytesRef> termComp = terms.getComparator();
 
+        long sumTotalTermFreq = 0;
+
         while(true) {
 
           final BytesRef term = terms.next();
@@ -660,6 +662,8 @@ public class CheckIndex {
           }
 
           int lastDoc = -1;
+          int docCount = 0;
+          long totalTermFreq = 0;
           while(true) {
             final int doc = docs2.nextDoc();
             if (doc == DocIdSetIterator.NO_MORE_DOCS) {
@@ -667,6 +671,8 @@ public class CheckIndex {
             }
             final int freq = docs2.freq();
             status.totPos += freq;
+            totalTermFreq += freq;
+            docCount++;
 
             if (doc <= lastDoc) {
               throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
@@ -697,22 +703,39 @@ public class CheckIndex {
               }
             }
           }
+          
+          final long totalTermFreq2 = terms.totalTermFreq();
+          final boolean hasTotalTermFreq = postings != null && totalTermFreq2 != -1;
 
-          // Now count how many deleted docs occurred in
-          // this term:
-
+          // Re-count if there are deleted docs:
           if (reader.hasDeletions()) {
             final DocsEnum docsNoDel = terms.docs(null, docs);
-            int count = 0;
+            docCount = 0;
+            totalTermFreq = 0;
             while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-              count++;
+              docCount++;
+              totalTermFreq += docsNoDel.freq();
             }
-            if (count != docFreq) {
-              throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + count);
+          }
+
+          if (docCount != docFreq) {
+            throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
+          }
+          if (hasTotalTermFreq) {
+            sumTotalTermFreq += totalTermFreq;
+            if (totalTermFreq != totalTermFreq2) {
+              throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
             }
           }
         }
 
+        if (sumTotalTermFreq != 0) {
+          final long v = fields.terms(field).getSumTotalTermFreq();
+          if (v != -1 && sumTotalTermFreq != v) {
+            throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
+          }
+        }
+
         // Test seek to last term:
         if (lastTerm != null) {
           if (terms.seek(lastTerm) != TermsEnum.SeekStatus.FOUND) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInvertState.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInvertState.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldInvertState.java Sun Jan 16 02:25:24 2011
@@ -30,6 +30,7 @@ public final class FieldInvertState {
   int length;
   int numOverlap;
   int offset;
+  int maxTermFrequency;
   float boost;
   AttributeSource attributeSource;
 
@@ -53,6 +54,7 @@ public final class FieldInvertState {
     length = 0;
     numOverlap = 0;
     offset = 0;
+    maxTermFrequency = 0;
     boost = docBoost;
     attributeSource = null;
   }
@@ -110,6 +112,15 @@ public final class FieldInvertState {
   public void setBoost(float boost) {
     this.boost = boost;
   }
+
+  /**
+   * Get the maximum term-frequency encountered for any term in the field.  A
+   * field containing "the quick brown fox jumps over the lazy dog" would have
+   * a value of 2, because "the" appears twice.
+   */
+  public int getMaxTermFrequency() {
+    return maxTermFrequency;
+  }
   
   public AttributeSource getAttributeSource() {
     return attributeSource;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Sun Jan 16 02:25:24 2011
@@ -99,6 +99,11 @@ public class FilterIndexReader extends I
     public long getUniqueTermCount() throws IOException {
       return in.getUniqueTermCount();
     }
+
+    @Override
+    public long getSumTotalTermFreq() throws IOException {
+      return in.getSumTotalTermFreq();
+    }
   }
 
   /** Base class for filtering {@link TermsEnum} implementations. */
@@ -156,6 +161,11 @@ public class FilterIndexReader extends I
     }
 
     @Override
+    public long totalTermFreq() {
+      return in.totalTermFreq();
+    }
+
+    @Override
     public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
       return in.docs(skipDocs, reuse);
     }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java Sun Jan 16 02:25:24 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.PostingsConsumer;
+import org.apache.lucene.index.codecs.TermStats;
 import org.apache.lucene.index.codecs.TermsConsumer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
@@ -130,6 +131,7 @@ final class FreqProxTermsWriterPerField 
       postings.docFreqs[termID] = 1;
       writeProx(termID, fieldState.position);
     }
+    fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
   }
 
   @Override
@@ -163,11 +165,12 @@ final class FreqProxTermsWriterPerField 
           termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
         }
         postings.docFreqs[termID] = 1;
+        fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
         postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
         postings.lastDocIDs[termID] = docState.docID;
         writeProx(termID, fieldState.position);
       } else {
-        postings.docFreqs[termID]++;
+        fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
         writeProx(termID, fieldState.position-postings.lastPositions[termID]);
       }
     }
@@ -237,7 +240,7 @@ final class FreqProxTermsWriterPerField 
     final ByteSliceReader freq = new ByteSliceReader();
     final ByteSliceReader prox = new ByteSliceReader();
 
-
+    long sumTotalTermFreq = 0;
     for (int i = 0; i < numTerms; i++) {
       final int termID = termIDs[i];
       // Get BytesRef
@@ -261,6 +264,7 @@ final class FreqProxTermsWriterPerField 
       // which all share the same term.  Now we must
       // interleave the docID streams.
       int numDocs = 0;
+      long totTF = 0;
       int docID = 0;
       int termFreq = 0;
 
@@ -305,6 +309,7 @@ final class FreqProxTermsWriterPerField 
           // omitTermFreqAndPositions == false so we do write positions &
           // payload
           int position = 0;
+          totTF += termDocFreq;
           for(int j=0;j<termDocFreq;j++) {
             final int code = prox.readVInt();
             position += code >> 1;
@@ -338,10 +343,11 @@ final class FreqProxTermsWriterPerField 
           postingsConsumer.finishDoc();
         }
       }
-      termsConsumer.finishTerm(text, numDocs);
+      termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
+      sumTotalTermFreq += totTF;
     }
 
-    termsConsumer.finish();
+    termsConsumer.finish(sumTotalTermFreq);
   }
 
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 16 02:25:24 2011
@@ -997,6 +997,23 @@ public abstract class IndexReader implem
     return terms.docFreq(term);
   }
 
+  /** Returns the number of documents containing the term
+   * <code>t</code>.  This method returns 0 if the term or
+   * field does not exists.  This method does not take into
+   * account deleted documents that have not yet been merged
+   * away. */
+  public long totalTermFreq(String field, BytesRef term) throws IOException {
+    final Fields fields = fields();
+    if (fields == null) {
+      return 0;
+    }
+    final Terms terms = fields.terms(field);
+    if (terms == null) {
+      return 0;
+    }
+    return terms.totalTermFreq(term);
+  }
+
   /** This may return null if the field does not exist.*/
   public Terms terms(String field) throws IOException {
     final Fields fields = fields();

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTerms.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTerms.java Sun Jan 16 02:25:24 2011
@@ -77,6 +77,19 @@ public final class MultiTerms extends Te
   }
 
   @Override
+  public long getSumTotalTermFreq() throws IOException {
+    long sum = 0;
+    for(Terms terms : subs) {
+      final long v = terms.getSumTotalTermFreq();
+      if (v == -1) {
+        return -1;
+      }
+      sum += v;
+    }
+    return sum;
+  }
+
+  @Override
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Sun Jan 16 02:25:24 2011
@@ -266,6 +266,19 @@ public final class MultiTermsEnum extend
   }
 
   @Override
+  public long totalTermFreq() {
+    long sum = 0;
+    for(int i=0;i<numTop;i++) {
+      final long v = top[i].terms.totalTermFreq();
+      if (v == -1) {
+        return v;
+      }
+      sum += v;
+    }
+    return sum;
+  }
+
+  @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
     final MultiDocsEnum docsEnum;
     if (reuse != null) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Terms.java Sun Jan 16 02:25:24 2011
@@ -57,6 +57,18 @@ public abstract class Terms {
     }
   }
 
+  /** Returns the number of documents containing the
+   *  specified term text.  Returns 0 if the term does not
+   *  exist. */
+  public long totalTermFreq(BytesRef text) throws IOException {
+    final TermsEnum termsEnum = getThreadTermsEnum();
+    if (termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) {
+      return termsEnum.totalTermFreq();
+    } else {
+      return 0;
+    }
+  }
+
   /** Get {@link DocsEnum} for the specified term.  This
    *  method may return null if the term does not exist. */
   public DocsEnum docs(Bits skipDocs, BytesRef text, DocsEnum reuse) throws IOException {
@@ -115,6 +127,14 @@ public abstract class Terms {
     throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
   }
 
+  /** Returns the sum of {@link TermsEnum#totalTermFreq} for
+   *  all terms in this field, or -1 if this measure isn't
+   *  stored by the codec (or if this fields omits term freq
+   *  and positions).  Note that, just like other term
+   *  measures, this measure does not take deleted documents
+   *  into account. */
+  public abstract long getSumTotalTermFreq() throws IOException;
+
   /**
    * Returns a thread-private {@link TermsEnum} instance. Obtaining
    * {@link TermsEnum} from this method might be more efficient than using

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermsEnum.java Sun Jan 16 02:25:24 2011
@@ -125,7 +125,15 @@ public abstract class TermsEnum {
    *  first time, after next() returns null or seek returns
    *  {@link SeekStatus#END}.*/
   public abstract int docFreq();
-  
+
+  /** Returns the total number of occurrences of this term
+   *  across all documents (the sum of the freq() for each
+   *  doc that has this term).  This will be -1 if the
+   *  codec doesn't support this measure.  Note that, like
+   *  other term measures, this measure does not take
+   *  deleted documents into account. */
+  public abstract long totalTermFreq();
+
   /** Get {@link DocsEnum} for the current term.  Do not
    *  call this before calling {@link #next} or {@link
    *  #seek} for the first time.  This method will not
@@ -197,6 +205,11 @@ public abstract class TermsEnum {
     public int docFreq() {
       throw new IllegalStateException("this method should never be called");
     }
+
+    @Override
+    public long totalTermFreq() {
+      throw new IllegalStateException("this method should never be called");
+    }
       
     @Override
     public long ord() {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Sun Jan 16 02:25:24 2011
@@ -132,7 +132,6 @@ public class FixedGapTermsIndexReader ex
   private class IndexEnum extends FieldIndexEnum {
     private final FieldIndexData.CoreFieldIndex fieldIndex;
     private final BytesRef term = new BytesRef();
-    private final BytesRef nextTerm = new BytesRef();
     private long ord;
 
     public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex) {
@@ -192,7 +191,7 @@ public class FixedGapTermsIndexReader ex
 
       final long offset = fieldIndex.termOffsets.get(idx);
       final int length = (int) (fieldIndex.termOffsets.get(1+idx) - offset);
-      termBytesReader.fillSlice(nextTerm, fieldIndex.termBytesStart + offset, length);
+      termBytesReader.fillSlice(term, fieldIndex.termBytesStart + offset, length);
       return fieldIndex.termsStart + fieldIndex.termsDictOffsets.get(idx);
     }
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java Sun Jan 16 02:25:24 2011
@@ -128,7 +128,7 @@ public class FixedGapTermsIndexWriter ex
     }
 
     @Override
-    public boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException {
+    public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
       // First term is first indexed term:
       if (0 == (numTerms++ % termIndexInterval)) {
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java Sun Jan 16 02:25:24 2011
@@ -55,9 +55,10 @@ public abstract class PostingsConsumer {
 
   /** Default merge impl: append documents, mapping around
    *  deletes */
-  public int merge(final MergeState mergeState, final DocsEnum postings) throws IOException {
+  public TermStats merge(final MergeState mergeState, final DocsEnum postings) throws IOException {
 
     int df = 0;
+    long totTF = 0;
 
     if (mergeState.fieldInfo.omitTermFreqAndPositions) {
       while(true) {
@@ -68,6 +69,7 @@ public abstract class PostingsConsumer {
         this.startDoc(doc, postings.freq());
         this.finishDoc();
         df++;
+        totTF++;
       }
     } else {
       final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
@@ -78,6 +80,7 @@ public abstract class PostingsConsumer {
         }
         final int freq = postingsEnum.freq();
         this.startDoc(doc, freq);
+        totTF += freq;
         for(int i=0;i<freq;i++) {
           final int position = postingsEnum.nextPosition();
           final BytesRef payload;
@@ -92,6 +95,6 @@ public abstract class PostingsConsumer {
         df++;
       }
     }
-    return df;
+    return new TermStats(df, totTF);
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java Sun Jan 16 02:25:24 2011
@@ -34,7 +34,7 @@ public abstract class PostingsWriterBase
   public abstract void startTerm() throws IOException;
 
   /** Finishes the current term */
-  public abstract void finishTerm(int numDocs, boolean isIndexTerm) throws IOException;
+  public abstract void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException;
 
   public abstract void setField(FieldInfo fieldInfo);
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java Sun Jan 16 02:25:24 2011
@@ -27,7 +27,8 @@ import org.apache.lucene.index.TermState
 public class PrefixCodedTermState extends OrdTermState {
   public int docFreq; // how many docs have this term
   public long filePointer; // fp into the terms dict primary file (_X.tis)
-
+  public long totalTermFreq;                           // total number of occurrences of this term
+  
   @Override
   public void copyFrom(TermState _other) {
     assert _other instanceof PrefixCodedTermState : "can not copy from " + _other.getClass().getName();
@@ -35,11 +36,12 @@ public class PrefixCodedTermState extend
     super.copyFrom(_other);
     filePointer = other.filePointer;
     docFreq = other.docFreq;
+    totalTermFreq = other.totalTermFreq;
   }
 
   @Override
   public String toString() {
-    return super.toString() + "[ord=" + ord + ", tis.filePointer=" + filePointer + "]";
+    return super.toString() + "[ord=" + ord + ", tis.filePointer=" + filePointer + ", docFreq=" + docFreq + ", totalTermFreq=" + totalTermFreq + "]";
   }
   
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Sun Jan 16 02:25:24 2011
@@ -129,18 +129,17 @@ public class PrefixCodedTermsReader exte
       // Read per-field details
       seekDir(in, dirOffset);
 
-      final int numFields = in.readInt();
+      final int numFields = in.readVInt();
 
       for(int i=0;i<numFields;i++) {
-        final int field = in.readInt();
-        final long numTerms = in.readLong();
+        final int field = in.readVInt();
+        final long numTerms = in.readVLong();
         assert numTerms >= 0;
-        final long termsStartPointer = in.readLong();
+        final long termsStartPointer = in.readVLong();
         final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-        if (numTerms > 0) {
-          assert !fields.containsKey(fieldInfo.name);
-          fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer));
-        }
+        final long sumTotalTermFreq = fieldInfo.omitTermFreqAndPositions ? -1 : in.readVLong();
+        assert !fields.containsKey(fieldInfo.name);
+        fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq));
       }
       success = true;
     } finally {
@@ -245,12 +244,14 @@ public class PrefixCodedTermsReader exte
     final long numTerms;
     final FieldInfo fieldInfo;
     final long termsStartPointer;
+    final long sumTotalTermFreq;
 
-    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer) {
+    FieldReader(FieldInfo fieldInfo, long numTerms, long termsStartPointer, long sumTotalTermFreq) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       this.numTerms = numTerms;
       this.termsStartPointer = termsStartPointer;
+      this.sumTotalTermFreq = sumTotalTermFreq;
     }
 
     @Override
@@ -273,6 +274,11 @@ public class PrefixCodedTermsReader exte
       return numTerms;
     }
 
+    @Override
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
+    }
+
     // Iterates through terms in this field, not supporting ord()
     private final class SegmentTermsEnum extends TermsEnum {
       private final IndexInput in;
@@ -295,6 +301,7 @@ public class PrefixCodedTermsReader exte
         bytesReader = new DeltaBytesReader(in);
         fieldTerm.field = fieldInfo.name;
         state = postingsReader.newTermState();
+        state.totalTermFreq = -1;
         state.ord = -1;
       }
 
@@ -494,6 +501,10 @@ public class PrefixCodedTermsReader exte
           state.docFreq = (in.readVInt() << 6) | (b & 0x3F);
         }
 
+        if (!fieldInfo.omitTermFreqAndPositions) {
+          state.totalTermFreq = state.docFreq + in.readVLong();
+        }
+
         postingsReader.readTerm(in,
                                 fieldInfo, state,
                                 isIndexTerm);
@@ -512,6 +523,11 @@ public class PrefixCodedTermsReader exte
       }
 
       @Override
+      public long totalTermFreq() {
+        return state.totalTermFreq;
+      }
+
+      @Override
       public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
         final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse);
         assert docsEnum != null;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java Sun Jan 16 02:25:24 2011
@@ -60,7 +60,7 @@ public class PrefixCodedTermsWriter exte
   final FieldInfos fieldInfos;
   FieldInfo currentField;
   private final TermsIndexWriterBase termsIndexWriter;
-  private final List<TermsConsumer> fields = new ArrayList<TermsConsumer>();
+  private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
   private final Comparator<BytesRef> termComp;
 
   public PrefixCodedTermsWriter(
@@ -96,7 +96,7 @@ public class PrefixCodedTermsWriter exte
     assert currentField == null || currentField.name.compareTo(field.name) < 0;
     currentField = field;
     TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field);
-    TermsConsumer terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
+    final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
     fields.add(terms);
     return terms;
   }
@@ -105,16 +105,26 @@ public class PrefixCodedTermsWriter exte
   public void close() throws IOException {
 
     try {
-      final int fieldCount = fields.size();
+      
+      int nonZeroCount = 0;
+      for(TermsWriter field : fields) {
+        if (field.numTerms > 0) {
+          nonZeroCount++;
+        }
+      }
 
       final long dirStart = out.getFilePointer();
 
-      out.writeInt(fieldCount);
-      for(int i=0;i<fieldCount;i++) {
-        TermsWriter field = (TermsWriter) fields.get(i);
-        out.writeInt(field.fieldInfo.number);
-        out.writeLong(field.numTerms);
-        out.writeLong(field.termsStartPointer);
+      out.writeVInt(nonZeroCount);
+      for(TermsWriter field : fields) {
+        if (field.numTerms > 0) {
+          out.writeVInt(field.fieldInfo.number);
+          out.writeVLong(field.numTerms);
+          out.writeVLong(field.termsStartPointer);
+          if (!field.fieldInfo.omitTermFreqAndPositions) {
+            out.writeVLong(field.sumTotalTermFreq);
+          }
+        }
       }
       writeTrailer(dirStart);
     } finally {
@@ -142,6 +152,7 @@ public class PrefixCodedTermsWriter exte
     private final long termsStartPointer;
     private long numTerms;
     private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
+    long sumTotalTermFreq;
 
     TermsWriter(
         TermsIndexWriterBase.FieldWriter fieldIndexWriter,
@@ -169,12 +180,12 @@ public class PrefixCodedTermsWriter exte
     }
 
     @Override
-    public void finishTerm(BytesRef text, int numDocs) throws IOException {
+    public void finishTerm(BytesRef text, TermStats stats) throws IOException {
 
-      assert numDocs > 0;
+      assert stats.docFreq > 0;
       //System.out.println("finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " fp="  + out.getFilePointer());
 
-      final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, numDocs);
+      final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
 
       termWriter.write(text);
       final int highBit = isIndexTerm ? 0x80 : 0;
@@ -182,23 +193,28 @@ public class PrefixCodedTermsWriter exte
 
       // This is a vInt, except, we steal top bit to record
       // whether this was an indexed term:
-      if ((numDocs & ~0x3F) == 0) {
+      if ((stats.docFreq & ~0x3F) == 0) {
         // Fast case -- docFreq fits in 6 bits
-        out.writeByte((byte) (highBit | numDocs));
+        out.writeByte((byte) (highBit | stats.docFreq));
       } else {
         // Write bottom 6 bits of docFreq, then write the
         // remainder as vInt:
-        out.writeByte((byte) (highBit | 0x40 | (numDocs & 0x3F)));
-        out.writeVInt(numDocs >>> 6);
+        out.writeByte((byte) (highBit | 0x40 | (stats.docFreq & 0x3F)));
+        out.writeVInt(stats.docFreq >>> 6);
+      }
+      if (!fieldInfo.omitTermFreqAndPositions) {
+        assert stats.totalTermFreq >= stats.docFreq;
+        out.writeVLong(stats.totalTermFreq - stats.docFreq);
       }
-      postingsWriter.finishTerm(numDocs, isIndexTerm);
+      postingsWriter.finishTerm(stats, isIndexTerm);
       numTerms++;
     }
 
     // Finishes all terms in this field
     @Override
-    public void finish() throws IOException {
+    public void finish(long sumTotalTermFreq) throws IOException {
       // EOF marker:
+      this.sumTotalTermFreq = sumTotalTermFreq;
       out.writeVInt(DeltaBytesWriter.TERM_EOF);
       fieldIndexWriter.finish();
     }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java Sun Jan 16 02:25:24 2011
@@ -38,10 +38,10 @@ public abstract class TermsConsumer {
   public abstract PostingsConsumer startTerm(BytesRef text) throws IOException;
 
   /** Finishes the current term; numDocs must be > 0. */
-  public abstract void finishTerm(BytesRef text, int numDocs) throws IOException;
+  public abstract void finishTerm(BytesRef text, TermStats stats) throws IOException;
 
   /** Called when we are done adding terms to this field */
-  public abstract void finish() throws IOException;
+  public abstract void finish(long sumTotalTermFreq) throws IOException;
 
   /** Return the BytesRef Comparator used to sort terms
    *  before feeding to this API. */
@@ -55,6 +55,7 @@ public abstract class TermsConsumer {
 
     BytesRef term;
     assert termsEnum != null;
+    long sumTotalTermFreq = 0;
 
     if (mergeState.fieldInfo.omitTermFreqAndPositions) {
       if (docsEnum == null) {
@@ -69,9 +70,9 @@ public abstract class TermsConsumer {
         if (docsEnumIn != null) {
           docsEnum.reset(docsEnumIn);
           final PostingsConsumer postingsConsumer = startTerm(term);
-          final int numDocs = postingsConsumer.merge(mergeState, docsEnum);
-          if (numDocs > 0) {
-            finishTerm(term, numDocs);
+          final TermStats stats = postingsConsumer.merge(mergeState, docsEnum);
+          if (stats.docFreq > 0) {
+            finishTerm(term, stats);
           }
         }
       }
@@ -94,14 +95,15 @@ public abstract class TermsConsumer {
             }
           }
           final PostingsConsumer postingsConsumer = startTerm(term);
-          final int numDocs = postingsConsumer.merge(mergeState, postingsEnum);
-          if (numDocs > 0) {
-            finishTerm(term, numDocs);
+          final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum);
+          if (stats.docFreq > 0) {
+            finishTerm(term, stats);
+            sumTotalTermFreq += stats.totalTermFreq;
           }
         }
       }
     }
 
-    finish();
+    finish(sumTotalTermFreq);
   }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java Sun Jan 16 02:25:24 2011
@@ -28,7 +28,7 @@ public abstract class TermsIndexWriterBa
   public abstract void setTermsOutput(IndexOutput out);
 
   public abstract class FieldWriter {
-    public abstract boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException;
+    public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
     public abstract void finish() throws IOException;
   }
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java Sun Jan 16 02:25:24 2011
@@ -59,7 +59,7 @@ public class VariableGapTermsIndexWriter
   public static abstract class IndexTermSelector {
     // Called sequentially on every term being written,
     // returning true if this term should be indexed
-    public abstract boolean isIndexTerm(BytesRef term, int docFreq);
+    public abstract boolean isIndexTerm(BytesRef term, TermStats stats);
   }
 
   /** Same policy as {@link FixedGapTermsIndexWriter} */
@@ -74,7 +74,7 @@ public class VariableGapTermsIndexWriter
     }
 
     @Override
-    public boolean isIndexTerm(BytesRef term, int docFreq) {
+    public boolean isIndexTerm(BytesRef term, TermStats stats) {
       if (count >= interval) {
         count = 0;
         return true;
@@ -99,8 +99,8 @@ public class VariableGapTermsIndexWriter
     }
 
     @Override
-    public boolean isIndexTerm(BytesRef term, int docFreq) {
-      if (docFreq >= docFreqThresh || count >= interval) {
+    public boolean isIndexTerm(BytesRef term, TermStats stats) {
+      if (stats.docFreq >= docFreqThresh || count >= interval) {
         count = 0;
         return true;
       } else {
@@ -214,8 +214,8 @@ public class VariableGapTermsIndexWriter
     }
 
     @Override
-    public boolean checkIndexTerm(BytesRef text, int docFreq) throws IOException {
-      if (policy.isIndexTerm(text, docFreq) || first) {
+    public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
+      if (policy.isIndexTerm(text, stats) || first) {
         first = false;
         //System.out.println("VGW: index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer());
         final int lengthSave = text.length;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Sun Jan 16 02:25:24 2011
@@ -33,7 +33,6 @@ import org.apache.lucene.index.FieldsEnu
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.CompoundFileReader;
@@ -263,6 +262,11 @@ public class PreFlexFields extends Field
         return BytesRef.getUTF8SortedAsUTF16Comparator();
       }
     }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return -1;
+    }
   }
 
   private class PreTermsEnum extends TermsEnum {
@@ -939,6 +943,11 @@ public class PreFlexFields extends Field
     }
 
     @Override
+    public long totalTermFreq() {
+      return -1;
+    }
+
+    @Override
     public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
       PreDocsEnum docsEnum;
       if (reuse == null || !(reuse instanceof PreDocsEnum)) {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Sun Jan 16 02:25:24 2011
@@ -54,6 +54,7 @@ public class PulsingPostingsReaderImpl e
   public void init(IndexInput termsIn) throws IOException {
     CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC,
       PulsingPostingsWriterImpl.VERSION_START, PulsingPostingsWriterImpl.VERSION_START);
+    maxPositions = termsIn.readVInt();
     wrappedPostingsReader.init(termsIn);
   }
 
@@ -115,8 +116,10 @@ public class PulsingPostingsReaderImpl e
 
     termState.pendingIndexTerm |= isIndexTerm;
 
-    // TODO: wasteful to use whole byte for this (need just a 1 bit);
-    if (termsIn.readByte() == 1) {
+    // total TF, but in the omitTFAP case its computed based on docFreq.
+    long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
+    
+    if (count <= maxPositions) {
 
       // Inlined into terms dict -- just read the byte[] blob in,
       // but don't decode it now (we only decode when a DocsEnum

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Sun Jan 16 02:25:24 2011
@@ -21,15 +21,16 @@ import java.io.IOException;
 
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.TermStats;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 
-// TODO: we now pulse entirely according to docFreq of the
-// term; it might be better to eg pulse by "net bytes used"
-// so that a term that has only 1 doc but zillions of
-// positions would not be inlined.  Though this is
+// TODO: we pulse based on total TF of the term,
+// it might be better to eg pulse by "net bytes used"
+// so that a term that has only 1 posting but a huge
+// payload would not be inlined.  Though this is
 // presumably rare in practice...
 
 /** @lucene.experimental */
@@ -85,6 +86,7 @@ public final class PulsingPostingsWriter
   public void start(IndexOutput termsOut) throws IOException {
     this.termsOut = termsOut;
     CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+    termsOut.writeVInt(pending.length); // encode maxPositions in header
     wrappedPostingsWriter.start(termsOut);
   }
 
@@ -177,7 +179,7 @@ public final class PulsingPostingsWriter
 
   /** Called when we are done adding docs to this term */
   @Override
-  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+  public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
     //System.out.println("PW   finishTerm docCount=" + docCount);
 
     assert pendingCount > 0 || pendingCount == -1;
@@ -185,8 +187,7 @@ public final class PulsingPostingsWriter
     pendingIsIndexTerm |= isIndexTerm;
 
     if (pendingCount == -1) {
-      termsOut.writeByte((byte) 0);
-      wrappedPostingsWriter.finishTerm(docCount, pendingIsIndexTerm);
+      wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
       pendingIsIndexTerm = false;
     } else {
 
@@ -194,8 +195,6 @@ public final class PulsingPostingsWriter
       // term, so we fully inline our postings data into
       // terms dict, now:
 
-      termsOut.writeByte((byte) 1);
-
       // TODO: it'd be better to share this encoding logic
       // in some inner codec that knows how to write a
       // single doc / single position, etc.  This way if a

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Sun Jan 16 02:25:24 2011
@@ -25,6 +25,7 @@ import org.apache.lucene.index.FieldInfo
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.TermStats;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
@@ -239,11 +240,11 @@ public final class SepPostingsWriterImpl
 
   /** Called when we are done adding docs to this term */
   @Override
-  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
+  public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
 
     // TODO: -- wasteful we are counting this in two places?
-    assert docCount > 0;
-    assert docCount == df;
+    assert stats.docFreq > 0;
+    assert stats.docFreq == df;
 
     docIndex.write(termsOut, isIndexTerm);
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Sun Jan 16 02:25:24 2011
@@ -21,7 +21,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.FieldsEnum;
-import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -119,28 +118,31 @@ class SimpleTextFieldsReader extends Fie
     private final IndexInput in;
     private final boolean omitTF;
     private int docFreq;
+    private long totalTermFreq;
     private long docsStart;
     private boolean ended;
-    private final BytesRefFSTEnum<PairOutputs.Pair<Long,Long>> fstEnum;
+    private final BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstEnum;
 
-    public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,Long>> fst, boolean omitTF) throws IOException {
+    public SimpleTextTermsEnum(FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst, boolean omitTF) throws IOException {
       this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
       this.omitTF = omitTF;
-      fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,Long>>(fst);
+      fstEnum = new BytesRefFSTEnum<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(fst);
     }
 
     public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
 
       //System.out.println("seek to text=" + text.utf8ToString());
-      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.seekCeil(text);
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
       if (result == null) {
         //System.out.println("  end");
         return SeekStatus.END;
       } else {
         //System.out.println("  got text=" + term.utf8ToString());
-        PairOutputs.Pair<Long,Long> pair = result.output;
-        docsStart = pair.output1;
-        docFreq = pair.output2.intValue();
+        PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
+        PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
+        docsStart = pair1.output1;
+        docFreq = pair2.output1.intValue();
+        totalTermFreq = pair2.output2;
 
         if (result.input.equals(text)) {
           //System.out.println("  match docsStart=" + docsStart);
@@ -155,11 +157,13 @@ class SimpleTextFieldsReader extends Fie
     @Override
     public BytesRef next() throws IOException {
       assert !ended;
-      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,Long>> result = fstEnum.next();
+      final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.next();
       if (result != null) {
-        final PairOutputs.Pair<Long,Long> pair = result.output;
-        docsStart = pair.output1;
-        docFreq = pair.output2.intValue();
+        PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
+        PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
+        docsStart = pair1.output1;
+        docFreq = pair2.output1.intValue();
+        totalTermFreq = pair2.output2;
         return result.input;
       } else {
         return null;
@@ -187,6 +191,11 @@ class SimpleTextFieldsReader extends Fie
     }
 
     @Override
+    public long totalTermFreq() {
+      return totalTermFreq;
+    }
+ 
+    @Override
     public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
       SimpleTextDocsEnum docsEnum;
       if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(in)) {
@@ -438,8 +447,9 @@ class SimpleTextFieldsReader extends Fie
   private class SimpleTextTerms extends Terms {
     private final long termsStart;
     private final boolean omitTF;
-    private FST<PairOutputs.Pair<Long,Long>> fst;
-
+    private long sumTotalTermFreq;
+    private FST<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fst;
+    private int termCount;
     private final BytesRef scratch = new BytesRef(10);
 
     public SimpleTextTerms(String field, long termsStart) throws IOException {
@@ -450,24 +460,38 @@ class SimpleTextFieldsReader extends Fie
 
     private void loadTerms() throws IOException {
       PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
-      Builder<PairOutputs.Pair<Long,Long>> b = new Builder<PairOutputs.Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, 0, 0, true, new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs));
+      final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
+      b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1,
+                                                                          0,
+                                                                          0,
+                                                                          true,
+                                                                          new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
+                                                                                                                            new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs)));
       IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
       in.seek(termsStart);
       final BytesRef lastTerm = new BytesRef(10);
       long lastDocsStart = -1;
       int docFreq = 0;
+      long totalTermFreq = 0;
       while(true) {
         readLine(in, scratch);
         if (scratch.equals(END) || scratch.startsWith(FIELD)) {
           if (lastDocsStart != -1) {
-            b.add(lastTerm, new PairOutputs.Pair<Long,Long>(lastDocsStart, Long.valueOf(docFreq)));
+            b.add(lastTerm, new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
+                                                                                   new PairOutputs.Pair<Long,Long>((long) docFreq,
+                                                                                                                   posIntOutputs.get(totalTermFreq))));
+            sumTotalTermFreq += totalTermFreq;
           }
           break;
         } else if (scratch.startsWith(DOC)) {
           docFreq++;
+        } else if (scratch.startsWith(POS)) {
+          totalTermFreq++;
         } else if (scratch.startsWith(TERM)) {
           if (lastDocsStart != -1) {
-            b.add(lastTerm, new PairOutputs.Pair<Long,Long>(lastDocsStart, Long.valueOf(docFreq)));
+            b.add(lastTerm, new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
+                                                                                   new PairOutputs.Pair<Long,Long>((long) docFreq,
+                                                                                                                   posIntOutputs.get(totalTermFreq))));
           }
           lastDocsStart = in.getFilePointer();
           final int len = scratch.length - TERM.length;
@@ -477,6 +501,9 @@ class SimpleTextFieldsReader extends Fie
           System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
           lastTerm.length = len;
           docFreq = 0;
+          sumTotalTermFreq += totalTermFreq;
+          totalTermFreq = 0;
+          termCount++;
         }
       }
       fst = b.finish();
@@ -502,6 +529,16 @@ class SimpleTextFieldsReader extends Fie
     public Comparator<BytesRef> getComparator() {
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
+
+    @Override
+    public long getUniqueTermCount() {
+      return (long) termCount;
+    }
+
+    @Override
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
+    }
   }
 
   @Override

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java Sun Jan 16 02:25:24 2011
@@ -22,6 +22,7 @@ import org.apache.lucene.util.UnicodeUti
 import org.apache.lucene.index.codecs.FieldsConsumer;
 import org.apache.lucene.index.codecs.TermsConsumer;
 import org.apache.lucene.index.codecs.PostingsConsumer;
+import org.apache.lucene.index.codecs.TermStats;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
@@ -84,11 +85,11 @@ class SimpleTextFieldsWriter extends Fie
     }
 
     @Override
-    public void finishTerm(BytesRef term, int numDocs) throws IOException {
+    public void finishTerm(BytesRef term, TermStats stats) throws IOException {
     }
 
     @Override
-    public void finish() throws IOException {
+    public void finish(long sumTotalTermFreq) throws IOException {
     }
 
     @Override

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Sun Jan 16 02:25:24 2011
@@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentWr
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.codecs.PostingsWriterBase;
+import org.apache.lucene.index.codecs.TermStats;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 
@@ -184,12 +185,12 @@ public final class StandardPostingsWrite
 
   /** Called when we are done adding docs to this term */
   @Override
-  public void finishTerm(int docCount, boolean isIndexTerm) throws IOException {
-    assert docCount > 0;
+  public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
+    assert stats.docFreq > 0;
 
     // TODO: wasteful we are counting this (counting # docs
     // for this term) in two places?
-    assert docCount == df;
+    assert stats.docFreq == df;
 
     if (isIndexTerm) {
       // Write absolute at seek points

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Sun Jan 16 02:25:24 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.search;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Weight.ScorerContext;
 import org.apache.lucene.util.ToStringUtils;
 
 import java.io.IOException;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Sun Jan 16 02:25:24 2011
@@ -126,6 +126,11 @@ public abstract class FilteredTermsEnum 
     return tenum.docFreq();
   }
 
+  @Override
+  public long totalTermFreq() {
+    return tenum.totalTermFreq();
+  }
+
   /** This enum does not support seeking!
    * @throws UnsupportedOperationException
    */

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java?rev=1059434&r1=1059433&r2=1059434&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Sun Jan 16 02:25:24 2011
@@ -244,6 +244,11 @@ public final class FuzzyTermsEnum extend
   public int docFreq() {
     return actualEnum.docFreq();
   }
+
+  @Override
+  public long totalTermFreq() {
+    return actualEnum.totalTermFreq();
+  }
   
   @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {