You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/10 16:16:21 UTC

svn commit: r1371709 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/codecs/ core/src/java/org/apache/lucene/codecs/bloom/ core/src/java/org/apache/lucene/codecs/lucene40/ core/src/java/org/apache/lucene/codecs/memory/ core/src/java/o...

Author: rmuir
Date: Fri Aug 10 14:16:20 2012
New Revision: 1371709

URL: http://svn.apache.org/viewvc?rev=1371709&view=rev
Log:
LUCENE-4299: add Terms hasPositions/hasOffsets, so you know what features a docs TVs have

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
    lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug 10 14:16:20 2012
@@ -8,6 +8,13 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 4.0.0 =======================
 
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+  Previously you had no real way to know that a term vector field
+  had positions or offsets, since this can be configured on a 
+  per-field-per-document basis. (Robert Muir)
+
 Bug Fixes
 
 * LUCENE-4297: BooleanScorer2 would multiply the coord() factor

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java Fri Aug 10 14:16:20 2012
@@ -254,6 +254,16 @@ public class BlockTermsReader extends Fi
     }
 
     @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+
+    @Override
     public long size() {
       return numTerms;
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Fri Aug 10 14:16:20 2012
@@ -457,6 +457,16 @@ public class BlockTreeTermsReader extend
     }
 
     @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+
+    @Override
     public TermsEnum iterator(TermsEnum reuse) throws IOException {
       return new SegmentTermsEnum();
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Fri Aug 10 14:16:20 2012
@@ -184,6 +184,9 @@ public abstract class TermVectorsWriter 
     final FieldsEnum fieldsEnum = vectors.iterator();
     String fieldName;
     String lastFieldName = null;
+    
+    TermsEnum termsEnum = null;
+    DocsAndPositionsEnum docsAndPositionsEnum = null;
 
     while((fieldName = fieldsEnum.next()) != null) {
       final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
@@ -196,39 +199,30 @@ public abstract class TermVectorsWriter 
         // FieldsEnum shouldn't lie...
         continue;
       }
+      
+      final boolean hasPositions = terms.hasPositions();
+      final boolean hasOffsets = terms.hasOffsets();
+      
       final int numTerms = (int) terms.size();
       if (numTerms == -1) {
         throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
       }
-      final TermsEnum termsEnum = terms.iterator(null);
-
-      DocsAndPositionsEnum docsAndPositionsEnum = null;
-
-      boolean startedField = false;
-
-      // NOTE: this is tricky, because TermVectors allow
-      // indexing offsets but NOT positions.  So we must
-      // lazily init the field by checking whether first
-      // position we see is -1 or not.
+      
+      startField(fieldInfo, numTerms, hasPositions, hasOffsets);
+      termsEnum = terms.iterator(termsEnum);
 
       int termCount = 0;
       while(termsEnum.next() != null) {
         termCount++;
 
         final int freq = (int) termsEnum.totalTermFreq();
+        
+        startTerm(termsEnum.term(), freq);
 
-        if (startedField) {
-          startTerm(termsEnum.term(), freq);
-        }
-
-        // TODO: we need a "query" API where we can ask (via
-        // flex API) what this term was indexed with...
-        // Both positions & offsets:
-        docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
-        boolean hasOffsets = false;
-        boolean hasPositions = false;
-
-        if (docsAndPositionsEnum != null) {
+        if (hasPositions || hasOffsets) {
+          docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
+          assert docsAndPositionsEnum != null;
+          
           final int docID = docsAndPositionsEnum.nextDoc();
           assert docID != DocIdSetIterator.NO_MORE_DOCS;
           assert docsAndPositionsEnum.freq() == freq;
@@ -237,28 +231,10 @@ public abstract class TermVectorsWriter 
             final int pos = docsAndPositionsEnum.nextPosition();
             final int startOffset = docsAndPositionsEnum.startOffset();
             final int endOffset = docsAndPositionsEnum.endOffset();
-            if (!startedField) {
-              assert numTerms > 0;
-              hasPositions = pos != -1;
-              hasOffsets = startOffset != -1;
-              startField(fieldInfo, numTerms, hasPositions, hasOffsets);
-              startTerm(termsEnum.term(), freq);
-              startedField = true;
-            }
-            if (hasOffsets) {
-              assert startOffset != -1;
-              assert endOffset != -1;
-            }
+
             assert !hasPositions || pos >= 0;
             addPosition(pos, startOffset, endOffset);
           }
-        } else {
-          if (!startedField) {
-            assert numTerms > 0;
-            startField(fieldInfo, numTerms, hasPositions, hasOffsets);
-            startTerm(termsEnum.term(), freq);
-            startedField = true;
-          }
         }
       }
       assert termCount == numTerms;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -314,6 +314,16 @@ public class BloomFilteringPostingsForma
       public int getDocCount() throws IOException {
         return delegateTerms.getDocCount();
       }
+
+      @Override
+      public boolean hasOffsets() {
+        return delegateTerms.hasOffsets();
+      }
+
+      @Override
+      public boolean hasPositions() {
+        return delegateTerms.hasPositions();
+      }
     }
     
     class BloomFilteredTermsEnum extends TermsEnum {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Fri Aug 10 14:16:20 2012
@@ -296,10 +296,16 @@ public class Lucene40TermVectorsReader e
   private class TVTerms extends Terms {
     private final int numTerms;
     private final long tvfFPStart;
+    private final boolean storePositions;
+    private final boolean storeOffsets;
+
 
     public TVTerms(long tvfFP) throws IOException {
       tvf.seek(tvfFP);
       numTerms = tvf.readVInt();
+      final byte bits = tvf.readByte();
+      storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+      storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
       tvfFPStart = tvf.getFilePointer();
     }
 
@@ -314,7 +320,7 @@ public class Lucene40TermVectorsReader e
       } else {
         termsEnum = new TVTermsEnum();
       }
-      termsEnum.reset(numTerms, tvfFPStart);
+      termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets);
       return termsEnum;
     }
 
@@ -345,6 +351,16 @@ public class Lucene40TermVectorsReader e
       // this...?  I guess codec could buffer and re-sort...
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
+
+    @Override
+    public boolean hasOffsets() {
+      return storeOffsets;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return storePositions;
+    }
   }
 
   private class TVTermsEnum extends TermsEnum {
@@ -373,13 +389,12 @@ public class Lucene40TermVectorsReader e
       return tvf == origTVF;
     }
 
-    public void reset(int numTerms, long tvfFPStart) throws IOException {
+    public void reset(int numTerms, long tvfFPStart, boolean storePositions, boolean storeOffsets) throws IOException {
       this.numTerms = numTerms;
+      this.storePositions = storePositions;
+      this.storeOffsets = storeOffsets;
       nextTerm = 0;
       tvf.seek(tvfFPStart);
-      final byte bits = tvf.readByte();
-      storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
-      storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
       tvfFP = 1+tvfFPStart;
       positions = null;
       startOffsets = null;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -635,6 +635,16 @@ public class DirectPostingsFormat extend
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
 
+    @Override
+    public boolean hasOffsets() {
+      return hasOffsets;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return hasPos;
+    }
+
     private final class DirectTermsEnum extends TermsEnum {
 
       private final BytesRef scratch = new BytesRef();

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -834,6 +834,16 @@ public class MemoryPostingsFormat extend
     public Comparator<BytesRef> getComparator() {
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
+
+    @Override
+    public boolean hasOffsets() {
+      return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Fri Aug 10 14:16:20 2012
@@ -609,6 +609,16 @@ class SimpleTextFieldsReader extends Fie
     public int getDocCount() throws IOException {
       return docCount;
     }
+
+    @Override
+    public boolean hasOffsets() {
+      return indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
   }
 
   @Override

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Fri Aug 10 14:16:20 2012
@@ -130,7 +130,7 @@ public class SimpleTextTermVectorsReader
       assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
       int termCount = parseIntAt(FIELDTERMCOUNT.length);
       
-      SimpleTVTerms terms = new SimpleTVTerms();
+      SimpleTVTerms terms = new SimpleTVTerms(offsets, positions);
       fields.put(fieldName, terms);
       
       for (int j = 0; j < termCount; j++) {
@@ -257,8 +257,12 @@ public class SimpleTextTermVectorsReader
   
   private static class SimpleTVTerms extends Terms {
     final SortedMap<BytesRef,SimpleTVPostings> terms;
+    final boolean hasOffsets;
+    final boolean hasPositions;
     
-    SimpleTVTerms() {
+    SimpleTVTerms(boolean hasOffsets, boolean hasPositions) {
+      this.hasOffsets = hasOffsets;
+      this.hasPositions = hasPositions;
       terms = new TreeMap<BytesRef,SimpleTVPostings>();
     }
     
@@ -292,6 +296,16 @@ public class SimpleTextTermVectorsReader
     public int getDocCount() throws IOException {
       return 1;
     }
+
+    @Override
+    public boolean hasOffsets() {
+      return hasOffsets;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return hasPositions;
+    }
   }
   
   private static class SimpleTVPostings {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Fri Aug 10 14:16:20 2012
@@ -718,6 +718,11 @@ public class CheckIndex {
         continue;
       }
       
+      final boolean hasPositions = terms.hasPositions();
+      final boolean hasOffsets = terms.hasOffsets();
+      // term vectors cannot omit TF
+      final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+
       final TermsEnum termsEnum = terms.iterator(null);
       
       boolean hasOrd = true;
@@ -777,17 +782,10 @@ public class CheckIndex {
         status.termCount++;
         
         final DocsEnum docs2;
-        final boolean hasPositions;
-        // if we are checking vectors, we have freqs implicitly
-        final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
-        // if we are checking vectors, offsets are a free-for-all anyway
-        final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
         if (postings != null) {
           docs2 = postings;
-          hasPositions = true;
         } else {
           docs2 = docs;
-          hasPositions = false;
         }
         
         int lastDoc = -1;
@@ -827,10 +825,7 @@ public class CheckIndex {
               // NOTE: pos=-1 is allowed because of ancient bug
               // (LUCENE-1542) whereby IndexWriter could
               // write pos=-1 when first token's posInc is 0
-              // (separately: analyzers should not give
-              // posInc=0 to first token); also, term
-              // vectors are allowed to return pos=-1 if
-              // they indexed offset but not positions:
+
               if (pos < -1) {
                 throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
               }
@@ -1439,19 +1434,18 @@ public class CheckIndex {
               }
               postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
               
+              final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
               BytesRef term = null;
               while ((term = termsEnum.next()) != null) {
-                
-                final boolean hasProx;
 
-                // Try positions:
-                postings = termsEnum.docsAndPositions(null, postings);
-                if (postings == null) {
-                  hasProx = false;
-                  // Try docIDs & freqs:
-                  docs = termsEnum.docs(null, docs);
+                if (hasProx) {
+                  postings = termsEnum.docsAndPositions(null, postings);
+                  assert postings != null;
+                  docs = null;
                 } else {
-                  hasProx = true;
+                  docs = termsEnum.docs(null, docs);
+                  assert docs != null;
+                  postings = null;
                 }
 
                 final DocsEnum docs2;

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Fri Aug 10 14:16:20 2012
@@ -109,6 +109,16 @@ public class FilterAtomicReader extends 
     public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
       return in.intersect(automaton, bytes);
     }
+
+    @Override
+    public boolean hasOffsets() {
+      return in.hasOffsets();
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return in.hasPositions();
+    }
   }
 
   /** Base class for filtering {@link TermsEnum} implementations. */

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Fri Aug 10 14:16:20 2012
@@ -37,12 +37,17 @@ public final class MultiTerms extends Te
   private final Terms[] subs;
   private final ReaderSlice[] subSlices;
   private final Comparator<BytesRef> termComp;
+  private final boolean hasOffsets;
+  private final boolean hasPositions;
 
   public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
     this.subs = subs;
     this.subSlices = subSlices;
     
     Comparator<BytesRef> _termComp = null;
+    assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
+    boolean _hasOffsets = true;
+    boolean _hasPositions = true;
     for(int i=0;i<subs.length;i++) {
       if (_termComp == null) {
         _termComp = subs[i].getComparator();
@@ -54,9 +59,13 @@ public final class MultiTerms extends Te
           throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
         }
       }
+      _hasOffsets &= subs[i].hasOffsets();
+      _hasPositions &= subs[i].hasPositions();
     }
 
     termComp = _termComp;
+    hasOffsets = _hasOffsets;
+    hasPositions = _hasPositions;
   }
 
   @Override
@@ -142,5 +151,15 @@ public final class MultiTerms extends Te
   public Comparator<BytesRef> getComparator() {
     return termComp;
   }
+
+  @Override
+  public boolean hasOffsets() {
+    return hasOffsets;
+  }
+
+  @Override
+  public boolean hasPositions() {
+    return hasPositions;
+  }
 }
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java Fri Aug 10 14:16:20 2012
@@ -104,6 +104,12 @@ public abstract class Terms {
    *  measures, this measure does not take deleted documents
    *  into account. */
   public abstract int getDocCount() throws IOException;
+  
+  /** Returns true if documents in this field store offsets. */
+  public abstract boolean hasOffsets();
+  
+  /** Returns true if documents in this field store positions. */
+  public abstract boolean hasPositions();
 
   public final static Terms[] EMPTY_ARRAY = new Terms[0];
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java Fri Aug 10 14:16:20 2012
@@ -207,6 +207,8 @@ public class TestDuelingCodecs extends L
       return;
     }
     assertTermsStatistics(leftTerms, rightTerms);
+    assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
+    assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
 
     TermsEnum leftTermsEnum = leftTerms.iterator(null);
     TermsEnum rightTermsEnum = rightTerms.iterator(null);

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java Fri Aug 10 14:16:20 2012
@@ -122,6 +122,16 @@ public final class FieldCacheRewriteMeth
         public long size() {
           return -1;
         }
+
+        @Override
+        public boolean hasOffsets() {
+          return false;
+        }
+
+        @Override
+        public boolean hasPositions() {
+          return false;
+        }
       });
       
       assert termsEnum != null;

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Fri Aug 10 14:16:20 2012
@@ -125,20 +125,7 @@ public class TokenSources {
   }
 
   private static boolean hasPositions(Terms vector) throws IOException {
-    final TermsEnum termsEnum = vector.iterator(null);
-    if (termsEnum.next() != null) {
-      DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
-      if (dpEnum != null) {
-        int doc = dpEnum.nextDoc();
-        assert doc >= 0 && doc != DocIdSetIterator.NO_MORE_DOCS;
-        int pos = dpEnum.nextPosition();
-        if (pos >= 0) {
-          return true;
-        }
-      }
-    }
-
-    return false;
+    return vector.hasPositions();
   }
 
   /**

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java Fri Aug 10 14:16:20 2012
@@ -56,18 +56,17 @@ public final class TokenStreamFromTermPo
     termAttribute = addAttribute(CharTermAttribute.class);
     positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
     offsetAttribute = addAttribute(OffsetAttribute.class);
+    final boolean hasOffsets = vector.hasOffsets();
     final TermsEnum termsEnum = vector.iterator(null);
     BytesRef text;
     DocsAndPositionsEnum dpEnum = null;
     while((text = termsEnum.next()) != null) {
       dpEnum = termsEnum.docsAndPositions(null, dpEnum);
       assert dpEnum != null; // presumably checked by TokenSources.hasPositions earlier
-      boolean hasOffsets = true;
       dpEnum.nextDoc();
       final int freq = dpEnum.freq();
       for (int j = 0; j < freq; j++) {
         int pos = dpEnum.nextPosition();
-        hasOffsets &= dpEnum.startOffset() >= 0;
         Token token;
         if (hasOffsets) {
           token = new Token(text.utf8ToString(),

Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Aug 10 14:16:20 2012
@@ -778,8 +778,16 @@ public class MemoryIndex {
             public int getDocCount() {
               return info.sortedTerms.length > 0 ? 1 : 0;
             }
-              
-              
+
+            @Override
+            public boolean hasOffsets() {
+              return stride == 3;
+            }
+
+            @Override
+            public boolean hasPositions() {
+              return true;
+            }  
           };
         }
       }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1371709&r1=1371708&r2=1371709&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Fri Aug 10 14:16:20 2012
@@ -127,9 +127,11 @@ public class RAMOnlyPostingsFormat exten
     long sumTotalTermFreq;
     long sumDocFreq;
     int docCount;
+    final FieldInfo.IndexOptions options;
 
-    RAMField(String field) {
+    RAMField(String field, FieldInfo.IndexOptions options) {
       this.field = field;
+      this.options = options;
     }
 
     @Override
@@ -161,6 +163,16 @@ public class RAMOnlyPostingsFormat exten
     public Comparator<BytesRef> getComparator() {
       return reverseUnicodeComparator;
     }
+
+    @Override
+    public boolean hasOffsets() {
+      return options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
   }
 
   static class RAMTerm {
@@ -198,7 +210,7 @@ public class RAMOnlyPostingsFormat exten
       if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
         throw new UnsupportedOperationException("this codec cannot index offsets");
       }
-      RAMField ramField = new RAMField(field.name);
+      RAMField ramField = new RAMField(field.name, field.getIndexOptions());
       postings.fieldToTerms.put(field.name, ramField);
       termsConsumer.reset(ramField);
       return termsConsumer;