You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/23 23:15:17 UTC

svn commit: r1235028 [1/3] - in /lucene/dev/branches/lucene2858: ./ dev-tools/maven/solr/core/ lucene/ lucene/src/java/org/apache/lucene/codecs/lucene3x/ lucene/src/java/org/apache/lucene/codecs/lucene40/ lucene/src/test-framework/java/org/apache/lucen...

Author: uschindler
Date: Mon Jan 23 22:15:15 2012
New Revision: 1235028

URL: http://svn.apache.org/viewvc?rev=1235028&view=rev
Log:
LUCENE-2858: Reverse merged revision(s) 1-0 from lucene/dev/trunk

Added:
    lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java
      - copied unchanged from r1235026, lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/analysis/TestMockCharFilter.java
      - copied unchanged from r1235026, lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/TestMockCharFilter.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java
      - copied unchanged from r1235026, lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java
    lucene/dev/branches/lucene2858/solr/core/src/test-files/solr/conf/stoptypes-1.txt
      - copied unchanged from r1235026, lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-1.txt
    lucene/dev/branches/lucene2858/solr/core/src/test-files/solr/conf/stoptypes-2.txt
      - copied unchanged from r1235026, lucene/dev/trunk/solr/core/src/test-files/solr/conf/stoptypes-2.txt
    lucene/dev/branches/lucene2858/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java
      - copied unchanged from r1235026, lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestTypeTokenFilterFactory.java
Modified:
    lucene/dev/branches/lucene2858/   (props changed)
    lucene/dev/branches/lucene2858/dev-tools/maven/solr/core/pom.xml.template
    lucene/dev/branches/lucene2858/lucene/   (props changed)
    lucene/dev/branches/lucene2858/lucene/CHANGES.txt
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
    lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
    lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldsWriter.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestASCIIFoldingFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestHyphenatedWordsFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestSegmentingTokenizerBase.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
    lucene/dev/branches/lucene2858/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
    lucene/dev/branches/lucene2858/modules/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
    lucene/dev/branches/lucene2858/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilterTest.java
    lucene/dev/branches/lucene2858/modules/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
    lucene/dev/branches/lucene2858/solr/   (props changed)
    lucene/dev/branches/lucene2858/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene2858/solr/core/   (props changed)
    lucene/dev/branches/lucene2858/solr/core/src/java/   (props changed)
    lucene/dev/branches/lucene2858/solr/core/src/test/   (props changed)

Modified: lucene/dev/branches/lucene2858/dev-tools/maven/solr/core/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/dev-tools/maven/solr/core/pom.xml.template?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/dev-tools/maven/solr/core/pom.xml.template (original)
+++ lucene/dev/branches/lucene2858/dev-tools/maven/solr/core/pom.xml.template Mon Jan 23 22:15:15 2012
@@ -203,6 +203,12 @@
         <directory>src/test-files</directory>
       </testResource>
       <testResource>
+        <directory>${project.build.testSourceDirectory}</directory>
+        <excludes>
+          <exclude>**/*.java</exclude>
+        </excludes>
+      </testResource>
+      <testResource>
         <directory>../solrj/src/test-files</directory>
       </testResource>
     </testResources>

Modified: lucene/dev/branches/lucene2858/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/CHANGES.txt?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene2858/lucene/CHANGES.txt Mon Jan 23 22:15:15 2012
@@ -790,7 +790,7 @@ New Features
   input mapping to it) for FSTs that have strictly monotonic long
   outputs (such as an ord).  (Mike McCandless)
   
-* LUCENE-3121: Add TypeTokenFilter that filters tokens based on
+* LUCENE-3671: Add TypeTokenFilter that filters tokens based on
   their TypeAttribute.  (Tommaso Teofili via Uwe Schindler)
 
 * LUCENE-3690: Added HTMLStripCharFilter, a CharFilter that strips HTML
@@ -814,9 +814,11 @@ Bug fixes
 * LUCENE-3641: Fixed MultiReader to correctly propagate readerFinishedListeners
   to clones/reopened readers.  (Uwe Schindler)
 
-* LUCENE-3642: Fixed bugs in CharTokenizer, n-gram filters, and smart chinese 
-  where they would create invalid offsets in some situations, leading to problems
-  in highlighting. (Max Beutel via Robert Muir)
+* LUCENE-3642, SOLR-2891, LUCENE-3717: Fixed bugs in CharTokenizer, n-gram filters, 
+  compound token filters, thai word filter, icutokenizer, pattern analyzer, 
+  wikipediatokenizer, and smart chinese where they would create invalid offsets in 
+  some situations, leading to problems in highlighting.  
+  (Max Beutel, Edwin Steiner via Robert Muir)
 
 * LUCENE-3639: TopDocs.merge was incorrectly setting TopDocs.maxScore to
   Float.MIN_VALUE when it should be Float.NaN, when there were 0

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene3x/SegmentTermDocs.java Mon Jan 23 22:15:15 2012
@@ -206,7 +206,7 @@ public class SegmentTermDocs {
         skipListReader = new Lucene40SkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
 
       if (!haveSkipped) {                          // lazily initialize skip stream
-        skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads);
+        skipListReader.init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads, false);
         haveSkipped = true;
       }
 

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java Mon Jan 23 22:15:15 2012
@@ -85,11 +85,11 @@ public class Lucene40FieldInfosReader ex
         // LUCENE-3027: past indices were able to write
         // storePayloads=true when omitTFAP is also true,
         // which is invalid.  We correct that, here:
-        if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+        if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
           storePayloads = false;
         }
         hasVectors |= storeTermVector;
-        hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+        hasProx |= isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
         hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
         // DV Types are packed in one byte
         byte val = input.readByte();

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java Mon Jan 23 22:15:15 2012
@@ -58,7 +58,7 @@ public class Lucene40FieldInfosWriter ex
       output.writeVInt(FORMAT_CURRENT);
       output.writeVInt(infos.size());
       for (FieldInfo fi : infos) {
-        assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
+        assert fi.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.storePayloads;
         byte bits = 0x0;
         if (fi.isIndexed) bits |= IS_INDEXED;
         if (fi.storeTermVector) bits |= STORE_TERMVECTOR;

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java Mon Jan 23 22:15:15 2012
@@ -197,7 +197,7 @@ public class Lucene40PostingsReader exte
       // undefined
     }
 
-    if (fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+    if (fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
       if (isFirstTerm) {
         termState.proxOffset = termState.bytesReader.readVLong();
       } else {
@@ -245,23 +245,23 @@ public class Lucene40PostingsReader exte
                                                DocsAndPositionsEnum reuse, boolean needsOffsets)
     throws IOException {
 
-    if (needsOffsets) {
-      // TODO: once we index offsets into postings fix this!
-      return null;
+    boolean hasOffsets = fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    if (needsOffsets && !hasOffsets) {
+      return null; // not available
     }
 
     // TODO: refactor
-    if (fieldInfo.storePayloads) {
-      SegmentDocsAndPositionsAndPayloadsEnum docsEnum;
-      if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsAndPayloadsEnum)) {
-        docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn);
+    if (fieldInfo.storePayloads || hasOffsets) {
+      SegmentFullPositionsEnum docsEnum;
+      if (reuse == null || !(reuse instanceof SegmentFullPositionsEnum)) {
+        docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
       } else {
-        docsEnum = (SegmentDocsAndPositionsAndPayloadsEnum) reuse;
+        docsEnum = (SegmentFullPositionsEnum) reuse;
         if (docsEnum.startFreqIn != freqIn) {
           // If you are using ParellelReader, and pass in a
           // reused DocsEnum, it could have come from another
           // reader also using standard codec
-          docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn);
+          docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
         }
       }
       return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
@@ -295,6 +295,7 @@ public class Lucene40PostingsReader exte
     
     protected boolean indexOmitsTF;                               // does current field omit term freq?
     protected boolean storePayloads;                        // does current field store payloads?
+    protected boolean storeOffsets;                         // does current field store offsets?
 
     protected int limit;                                    // number of docs in this posting
     protected int ord;                                      // how many docs we've read
@@ -324,6 +325,7 @@ public class Lucene40PostingsReader exte
     DocsEnum reset(FieldInfo fieldInfo, StandardTermState termState) throws IOException {
       indexOmitsTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
       storePayloads = fieldInfo.storePayloads;
+      storeOffsets = fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
       freqOffset = termState.freqOffset;
       skipOffset = termState.skipOffset;
 
@@ -471,7 +473,7 @@ public class Lucene40PostingsReader exte
 
           skipper.init(freqOffset + skipOffset,
                        freqOffset, 0,
-                       limit, storePayloads);
+                       limit, storePayloads, storeOffsets);
 
           skipped = true;
         }
@@ -665,7 +667,7 @@ public class Lucene40PostingsReader exte
   
   // TODO specialize DocsAndPosEnum too
   
-  // Decodes docs & positions. payloads are not present.
+  // Decodes docs & positions. payloads nor offsets are present.
   private final class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum {
     final IndexInput startFreqIn;
     private final IndexInput freqIn;
@@ -792,7 +794,7 @@ public class Lucene40PostingsReader exte
 
           skipper.init(freqOffset+skipOffset,
                        freqOffset, proxOffset,
-                       limit, false);
+                       limit, false, false);
 
           skipped = true;
         }
@@ -868,8 +870,8 @@ public class Lucene40PostingsReader exte
     }
   }
   
-  // Decodes docs & positions & payloads
-  private class SegmentDocsAndPositionsAndPayloadsEnum extends DocsAndPositionsEnum {
+  // Decodes docs & positions & (payloads and/or offsets)
+  private class SegmentFullPositionsEnum extends DocsAndPositionsEnum {
     final IndexInput startFreqIn;
     private final IndexInput freqIn;
     private final IndexInput proxIn;
@@ -895,16 +897,24 @@ public class Lucene40PostingsReader exte
     Lucene40SkipListReader skipper;
     private BytesRef payload;
     private long lazyProxPointer;
+    
+    boolean storePayloads;
+    boolean storeOffsets;
+    
+    int offsetLength;
+    int startOffset;
 
-    public SegmentDocsAndPositionsAndPayloadsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException {
+    public SegmentFullPositionsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException {
       startFreqIn = freqIn;
       this.freqIn = (IndexInput) freqIn.clone();
       this.proxIn = (IndexInput) proxIn.clone();
     }
 
-    public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
-      assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
-      assert fieldInfo.storePayloads;
+    public SegmentFullPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
+      storeOffsets = fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+      storePayloads = fieldInfo.storePayloads;
+      assert fieldInfo.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+      assert storePayloads || storeOffsets;
       if (payload == null) {
         payload = new BytesRef();
         payload.bytes = new byte[1];
@@ -923,6 +933,7 @@ public class Lucene40PostingsReader exte
       doc = -1;
       accum = 0;
       position = 0;
+      startOffset = 0;
 
       skipped = false;
       posPendingCount = 0;
@@ -963,6 +974,7 @@ public class Lucene40PostingsReader exte
       }
 
       position = 0;
+      startOffset = 0;
 
       //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
       return (doc = accum);
@@ -1001,7 +1013,7 @@ public class Lucene40PostingsReader exte
           //System.out.println("  init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length());
           skipper.init(freqOffset+skipOffset,
                        freqOffset, proxOffset,
-                       limit, true);
+                       limit, storePayloads, storeOffsets);
 
           skipped = true;
         }
@@ -1016,8 +1028,10 @@ public class Lucene40PostingsReader exte
           lazyProxPointer = skipper.getProxPointer();
           posPendingCount = 0;
           position = 0;
+          startOffset = 0;
           payloadPending = false;
           payloadLength = skipper.getPayloadLength();
+          offsetLength = skipper.getOffsetLength();
         }
       }
         
@@ -1038,27 +1052,38 @@ public class Lucene40PostingsReader exte
       }
       
       if (payloadPending && payloadLength > 0) {
-        // payload of last position as never retrieved -- skip it
+        // payload of last position was never retrieved -- skip it
         proxIn.seek(proxIn.getFilePointer() + payloadLength);
         payloadPending = false;
       }
 
       // scan over any docs that were iterated without their positions
       while(posPendingCount > freq) {
-
         final int code = proxIn.readVInt();
 
-        if ((code & 1) != 0) {
-          // new payload length
-          payloadLength = proxIn.readVInt();
-          assert payloadLength >= 0;
+        if (storePayloads) {
+          if ((code & 1) != 0) {
+            // new payload length
+            payloadLength = proxIn.readVInt();
+            assert payloadLength >= 0;
+          }
+          assert payloadLength != -1;
         }
         
-        assert payloadLength != -1;
-        proxIn.seek(proxIn.getFilePointer() + payloadLength);
+        if (storeOffsets) {
+          if ((proxIn.readVInt() & 1) != 0) {
+            // new offset length
+            offsetLength = proxIn.readVInt();
+          }
+        }
+        
+        if (storePayloads) {
+          proxIn.seek(proxIn.getFilePointer() + payloadLength);
+        }
 
         posPendingCount--;
         position = 0;
+        startOffset = 0;
         payloadPending = false;
         //System.out.println("StandardR.D&PE skipPos");
       }
@@ -1069,16 +1094,28 @@ public class Lucene40PostingsReader exte
         proxIn.seek(proxIn.getFilePointer()+payloadLength);
       }
 
-      final int code = proxIn.readVInt();
-      if ((code & 1) != 0) {
-        // new payload length
-        payloadLength = proxIn.readVInt();
-        assert payloadLength >= 0;
-      }
-      assert payloadLength != -1;
+      int code = proxIn.readVInt();
+      if (storePayloads) {
+        if ((code & 1) != 0) {
+          // new payload length
+          payloadLength = proxIn.readVInt();
+          assert payloadLength >= 0;
+        }
+        assert payloadLength != -1;
           
-      payloadPending = true;
-      position += code >>> 1;
+        payloadPending = true;
+        code >>>= 1;
+      }
+      position += code;
+      
+      if (storeOffsets) {
+        int offsetCode = proxIn.readVInt();
+        if ((offsetCode & 1) != 0) {
+          // new offset length
+          offsetLength = proxIn.readVInt();
+        }
+        startOffset += offsetCode >>> 1;
+      }
 
       posPendingCount--;
 
@@ -1090,32 +1127,36 @@ public class Lucene40PostingsReader exte
 
     @Override
     public int startOffset() throws IOException {
-      return -1;
+      return storeOffsets ? startOffset : -1;
     }
 
     @Override
     public int endOffset() throws IOException {
-      return -1;
+      return storeOffsets ? startOffset + offsetLength : -1;
     }
 
     /** Returns the payload at this position, or null if no
      *  payload was indexed. */
     @Override
     public BytesRef getPayload() throws IOException {
-      assert lazyProxPointer == -1;
-      assert posPendingCount < freq;
-      if (!payloadPending) {
-        throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
-      }
-      if (payloadLength > payload.bytes.length) {
-        payload.grow(payloadLength);
-      }
+      if (storePayloads) {
+        assert lazyProxPointer == -1;
+        assert posPendingCount < freq;
+        if (!payloadPending) {
+          throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+        }
+        if (payloadLength > payload.bytes.length) {
+          payload.grow(payloadLength);
+        }
 
-      proxIn.readBytes(payload.bytes, 0, payloadLength);
-      payload.length = payloadLength;
-      payloadPending = false;
+        proxIn.readBytes(payload.bytes, 0, payloadLength);
+        payload.length = payloadLength;
+        payloadPending = false;
 
-      return payload;
+        return payload;
+      } else {
+        throw new IOException("No payloads exist for this field!");
+      }
     }
 
     @Override

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java Mon Jan 23 22:15:15 2012
@@ -73,12 +73,15 @@ public final class Lucene40PostingsWrite
 
   IndexOptions indexOptions;
   boolean storePayloads;
+  boolean storeOffsets;
   // Starts a new term
   long freqStart;
   long proxStart;
   FieldInfo fieldInfo;
   int lastPayloadLength;
+  int lastOffsetLength;
   int lastPosition;
+  int lastOffset;
 
   // private String segment;
 
@@ -137,6 +140,8 @@ public final class Lucene40PostingsWrite
       proxStart = proxOut.getFilePointer();
       // force first payload to write its length
       lastPayloadLength = -1;
+      // force first offset to write its length
+      lastOffsetLength = -1;
     }
     skipListWriter.resetSkip();
   }
@@ -155,10 +160,8 @@ public final class Lucene40PostingsWrite
     */
     this.fieldInfo = fieldInfo;
     indexOptions = fieldInfo.indexOptions;
-    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
-      throw new UnsupportedOperationException("this codec cannot index offsets");
-    }
-        
+    
+    storeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;        
     storePayloads = fieldInfo.storePayloads;
     //System.out.println("  set init blockFreqStart=" + freqStart);
     //System.out.println("  set init blockProxStart=" + proxStart);
@@ -180,7 +183,7 @@ public final class Lucene40PostingsWrite
     }
 
     if ((++df % skipInterval) == 0) {
-      skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
+      skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength, storeOffsets, lastOffsetLength);
       skipListWriter.bufferSkip(df);
     }
 
@@ -197,31 +200,26 @@ public final class Lucene40PostingsWrite
     }
 
     lastPosition = 0;
+    lastOffset = 0;
   }
 
   /** Add a new position & payload */
   @Override
   public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
     //if (DEBUG) System.out.println("SPW:     addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
-    assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS: "invalid indexOptions: " + indexOptions;
+    assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 : "invalid indexOptions: " + indexOptions;
     assert proxOut != null;
 
-    // TODO: when we add offsets... often
-    // endOffset-startOffset will be constant or near
-    // constant for all docs (eg if the term wasn't stemmed
-    // then this will usually be the utf16 length of the
-    // term); would be nice to write that length once up
-    // front and then not encode endOffset for each
-    // position..
-
     final int delta = position - lastPosition;
     
     assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition;            // not quite right (if pos=0 is repeated twice we don't catch it)
 
     lastPosition = position;
 
+    int payloadLength = 0;
+
     if (storePayloads) {
-      final int payloadLength = payload == null ? 0 : payload.length;
+      payloadLength = payload == null ? 0 : payload.length;
 
       if (payloadLength != lastPayloadLength) {
         lastPayloadLength = payloadLength;
@@ -230,13 +228,28 @@ public final class Lucene40PostingsWrite
       } else {
         proxOut.writeVInt(delta << 1);
       }
-
-      if (payloadLength > 0) {
-        proxOut.writeBytes(payload.bytes, payload.offset, payloadLength);
-      }
     } else {
       proxOut.writeVInt(delta);
     }
+    
+    if (storeOffsets) {
+      // don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
+      // and the numbers aren't that much smaller anyways.
+      int offsetDelta = startOffset - lastOffset;
+      int offsetLength = endOffset - startOffset;
+      if (offsetLength != lastOffsetLength) {
+        proxOut.writeVInt(offsetDelta << 1 | 1);
+        proxOut.writeVInt(offsetLength);
+      } else {
+        proxOut.writeVInt(offsetDelta << 1);
+      }
+      lastOffset = startOffset;
+      lastOffsetLength = offsetLength;
+    }
+    
+    if (payloadLength > 0) {
+      proxOut.writeBytes(payload.bytes, payload.offset, payloadLength);
+    }
   }
 
   @Override
@@ -304,7 +317,7 @@ public final class Lucene40PostingsWrite
       assert firstTerm.skipOffset > 0;
       bytesWriter.writeVInt(firstTerm.skipOffset);
     }
-    if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
       bytesWriter.writeVLong(firstTerm.proxStart);
     }
     long lastFreqStart = firstTerm.freqStart;
@@ -319,7 +332,7 @@ public final class Lucene40PostingsWrite
         assert term.skipOffset > 0;
         bytesWriter.writeVInt(term.skipOffset);
       }
-      if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
+      if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
         bytesWriter.writeVLong(term.proxStart - lastProxStart);
         lastProxStart = term.proxStart;
       }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java Mon Jan 23 22:15:15 2012
@@ -30,13 +30,16 @@ import org.apache.lucene.store.IndexInpu
  */
 public class Lucene40SkipListReader extends MultiLevelSkipListReader {
   private boolean currentFieldStoresPayloads;
+  private boolean currentFieldStoresOffsets;
   private long freqPointer[];
   private long proxPointer[];
   private int payloadLength[];
+  private int offsetLength[];
   
   private long lastFreqPointer;
   private long lastProxPointer;
   private int lastPayloadLength;
+  private int lastOffsetLength;
                            
 
   public Lucene40SkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
@@ -44,17 +47,20 @@ public class Lucene40SkipListReader exte
     freqPointer = new long[maxSkipLevels];
     proxPointer = new long[maxSkipLevels];
     payloadLength = new int[maxSkipLevels];
+    offsetLength = new int[maxSkipLevels];
   }
 
-  public void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads) {
+  public void init(long skipPointer, long freqBasePointer, long proxBasePointer, int df, boolean storesPayloads, boolean storesOffsets) {
     super.init(skipPointer, df);
     this.currentFieldStoresPayloads = storesPayloads;
+    this.currentFieldStoresOffsets = storesOffsets;
     lastFreqPointer = freqBasePointer;
     lastProxPointer = proxBasePointer;
 
     Arrays.fill(freqPointer, freqBasePointer);
     Arrays.fill(proxPointer, proxBasePointer);
     Arrays.fill(payloadLength, 0);
+    Arrays.fill(offsetLength, 0);
   }
 
   /** Returns the freq pointer of the doc to which the last call of 
@@ -76,12 +82,20 @@ public class Lucene40SkipListReader exte
     return lastPayloadLength;
   }
   
+  /** Returns the offset length (endOffset-startOffset) of the position stored just before 
+   * the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} 
+   * has skipped.  */
+  public int getOffsetLength() {
+    return lastOffsetLength;
+  }
+  
   @Override
   protected void seekChild(int level) throws IOException {
     super.seekChild(level);
     freqPointer[level] = lastFreqPointer;
     proxPointer[level] = lastProxPointer;
     payloadLength[level] = lastPayloadLength;
+    offsetLength[level] = lastOffsetLength;
   }
   
   @Override
@@ -90,6 +104,7 @@ public class Lucene40SkipListReader exte
     lastFreqPointer = freqPointer[level];
     lastProxPointer = proxPointer[level];
     lastPayloadLength = payloadLength[level];
+    lastOffsetLength = offsetLength[level];
   }
 
 
@@ -110,6 +125,11 @@ public class Lucene40SkipListReader exte
     } else {
       delta = skipStream.readVInt();
     }
+
+    if (currentFieldStoresOffsets) {
+      offsetLength[level] = skipStream.readVInt();
+    }
+
     freqPointer[level] += skipStream.readVInt();
     proxPointer[level] += skipStream.readVInt();
     

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java Mon Jan 23 22:15:15 2012
@@ -40,7 +40,9 @@ public class Lucene40SkipListWriter exte
 
   private int curDoc;
   private boolean curStorePayloads;
+  private boolean curStoreOffsets;
   private int curPayloadLength;
+  private int curOffsetLength;
   private long curFreqPointer;
   private long curProxPointer;
 
@@ -58,10 +60,12 @@ public class Lucene40SkipListWriter exte
   /**
    * Sets the values for the current skip data. 
    */
-  public void setSkipData(int doc, boolean storePayloads, int payloadLength) {
+  public void setSkipData(int doc, boolean storePayloads, int payloadLength, boolean storeOffsets, int offsetLength) {
     this.curDoc = doc;
     this.curStorePayloads = storePayloads;
     this.curPayloadLength = payloadLength;
+    this.curStoreOffsets = storeOffsets;
+    this.curOffsetLength = offsetLength;
     this.curFreqPointer = freqOutput.getFilePointer();
     if (proxOutput != null)
       this.curProxPointer = proxOutput.getFilePointer();
@@ -116,6 +120,12 @@ public class Lucene40SkipListWriter exte
       // current field does not store payloads
       skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
     }
+
+    // TODO: not sure it really helps to shove this somewhere else if its the same as the last skip
+    if (curStoreOffsets) {
+      skipBuffer.writeVInt(curOffsetLength);
+    }
+
     skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
     skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level]));
 

Modified: lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Mon Jan 23 22:15:15 2012
@@ -17,6 +17,7 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
+import java.io.Reader;
 import java.io.StringReader;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -289,8 +290,12 @@ public abstract class BaseTokenStreamTes
       }
     }
   };
-
+  
   public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
+    checkRandomData(random, a, iterations, maxWordLength, random.nextBoolean());
+  }
+
+  public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter) throws IOException {
     for (int i = 0; i < iterations; i++) {
       String text;
       switch(_TestUtil.nextInt(random, 0, 4)) {
@@ -311,7 +316,9 @@ public abstract class BaseTokenStreamTes
         System.out.println("NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
       }
 
-      TokenStream ts = a.tokenStream("dummy", new StringReader(text));
+      int remainder = random.nextInt(10);
+      Reader reader = new StringReader(text);
+      TokenStream ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
       assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
       CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
       OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null;
@@ -339,30 +346,38 @@ public abstract class BaseTokenStreamTes
         if (VERBOSE) {
           System.out.println("NOTE: BaseTokenStreamTestCase: re-run analysis");
         }
+        reader = new StringReader(text);
+        ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
         if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
           // offset + pos + type
-          assertAnalyzesToReuse(a, text, 
+          assertTokenStreamContents(ts, 
             tokens.toArray(new String[tokens.size()]),
             toIntArray(startOffsets),
             toIntArray(endOffsets),
             types.toArray(new String[types.size()]),
-            toIntArray(positions));
+            toIntArray(positions),
+            text.length());
         } else if (posIncAtt != null && offsetAtt != null) {
           // offset + pos
-          assertAnalyzesToReuse(a, text, 
+          assertTokenStreamContents(ts, 
               tokens.toArray(new String[tokens.size()]),
               toIntArray(startOffsets),
               toIntArray(endOffsets),
-              toIntArray(positions));
+              null,
+              toIntArray(positions),
+              text.length());
         } else if (offsetAtt != null) {
           // offset
-          assertAnalyzesToReuse(a, text, 
+          assertTokenStreamContents(ts, 
               tokens.toArray(new String[tokens.size()]),
               toIntArray(startOffsets),
-              toIntArray(endOffsets));
+              toIntArray(endOffsets),
+              null,
+              null,
+              text.length());
         } else {
           // terms only
-          assertAnalyzesToReuse(a, text, 
+          assertTokenStreamContents(ts, 
               tokens.toArray(new String[tokens.size()]));
         }
       }

Modified: lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldsWriter.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldsWriter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWFieldsWriter.java Mon Jan 23 22:15:15 2012
@@ -137,7 +137,7 @@ class PreFlexRWFieldsWriter extends Fiel
         }
 
         if ((++df % termsOut.skipInterval) == 0) {
-          skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
+          skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength, false, 0);
           skipListWriter.bufferSkip(df);
         }
 

Modified: lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java?rev=1235028&r1=1235027&r2=1235028&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestPostingsOffsets.java Mon Jan 23 22:15:15 2012
@@ -22,29 +22,46 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CannedAnalyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockPayloadAnalyzer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.NumericField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.English;
 import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Assume;
+import org.apache.lucene.util._TestUtil;
 
 public class TestPostingsOffsets extends LuceneTestCase {
+  IndexWriterConfig iwc;
+  
+  public void setUp() throws Exception {
+    super.setUp();
+    // Currently only SimpleText and Lucene40 can index offsets into postings:
+    assumeTrue("codec does not support offsets", Codec.getDefault().getName().equals("SimpleText") || Codec.getDefault().getName().equals("Lucene40"));
+    iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+    
+    if (Codec.getDefault().getName().equals("Lucene40")) {
+      // pulsing etc are not implemented
+      iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
+    }
+  }
 
   public void testBasic() throws Exception {
-
-    // Currently only SimpleText can index offsets into postings:
-    Assume.assumeTrue(Codec.getDefault().getName().equals("SimpleText"));
-
     Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random, dir);
+    
+    RandomIndexWriter w = new RandomIndexWriter(random, dir, iwc);
     Document doc = new Document();
 
     FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
@@ -94,16 +111,117 @@ public class TestPostingsOffsets extends
     r.close();
     dir.close();
   }
+  
+  public void testSkipping() throws Exception {
+    doTestNumbers(false);
+  }
+  
+  public void testPayloads() throws Exception {
+    doTestNumbers(true);
+  }
+  
+  public void doTestNumbers(boolean withPayloads) throws Exception {
+    Directory dir = newDirectory();
+    Analyzer analyzer = withPayloads ? new MockPayloadAnalyzer() : new MockAnalyzer(random);
+    iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
+    if (Codec.getDefault().getName().equals("Lucene40")) {
+      // pulsing etc are not implemented
+      iwc.setCodec(_TestUtil.alwaysPostingsFormat(new Lucene40PostingsFormat()));
+    }
+    iwc.setMergePolicy(newLogMergePolicy()); // will rely on docids a bit for skipping
+    RandomIndexWriter w = new RandomIndexWriter(random, dir, iwc);
+    
+    FieldType ft = new FieldType(TextField.TYPE_STORED);
+    ft.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    if (random.nextBoolean()) {
+      ft.setStoreTermVectors(true);
+      ft.setStoreTermVectorOffsets(random.nextBoolean());
+      ft.setStoreTermVectorPositions(random.nextBoolean());
+    }
+    
+    int numDocs = atLeast(500);
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      doc.add(new Field("numbers", English.intToEnglish(i), ft));
+      doc.add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
+      doc.add(new StringField("id", "" + i));
+      w.addDocument(doc);
+    }
+    
+    IndexReader reader = w.getReader();
+    w.close();
+    
+    String terms[] = { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };
+    
+    for (String term : terms) {
+      DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(reader, null, "numbers", new BytesRef(term), true);
+      int doc;
+      while((doc = dp.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
+        String storedNumbers = reader.document(doc).get("numbers");
+        int freq = dp.freq();
+        for (int i = 0; i < freq; i++) {
+          dp.nextPosition();
+          int start = dp.startOffset();
+          assert start >= 0;
+          int end = dp.endOffset();
+          assert end >= 0 && end >= start;
+          // check that the offsets correspond to the term in the src text
+          assertTrue(storedNumbers.substring(start, end).equals(term));
+          if (withPayloads) {
+            // check that we have a payload and it starts with "pos"
+            assertTrue(dp.hasPayload());
+            BytesRef payload = dp.getPayload();
+            assertTrue(payload.utf8ToString().startsWith("pos:"));
+          } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
+        }
+      }
+    }
+    
+    // check we can skip correctly
+    int numSkippingTests = atLeast(50);
+    
+    for (int j = 0; j < numSkippingTests; j++) {
+      int num = _TestUtil.nextInt(random, 100, Math.min(numDocs-1, 999));
+      DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"), true);
+      int doc = dp.advance(num);
+      assertEquals(num, doc);
+      int freq = dp.freq();
+      for (int i = 0; i < freq; i++) {
+        String storedNumbers = reader.document(doc).get("numbers");
+        dp.nextPosition();
+        int start = dp.startOffset();
+        assert start >= 0;
+        int end = dp.endOffset();
+        assert end >= 0 && end >= start;
+        // check that the offsets correspond to the term in the src text
+        assertTrue(storedNumbers.substring(start, end).equals("hundred"));
+        if (withPayloads) {
+          // check that we have a payload and it starts with "pos"
+          assertTrue(dp.hasPayload());
+          BytesRef payload = dp.getPayload();
+          assertTrue(payload.utf8ToString().startsWith("pos:"));
+        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
+      }
+    }
+    
+    // check that other fields (without offsets) work correctly
+    
+    for (int i = 0; i < numDocs; i++) {
+      DocsEnum dp = MultiFields.getTermDocsEnum(reader, null, "id", new BytesRef("" + i), false);
+      assertEquals(i, dp.nextDoc());
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc());
+    }
+    
+    reader.close();
+    dir.close();
+  }
 
   public void testRandom() throws Exception {
-    // Currently only SimpleText can index offsets into postings:
-    Assume.assumeTrue(Codec.getDefault().getName().equals("SimpleText"));
-
     // token -> docID -> tokens
     final Map<String,Map<Integer,List<Token>>> actualTokens = new HashMap<String,Map<Integer,List<Token>>>();
 
     Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random, dir);
+    RandomIndexWriter w = new RandomIndexWriter(random, dir, iwc);
 
     final int numDocs = atLeast(20);
     //final int numDocs = atLeast(5);