You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2023/03/15 10:56:51 UTC
[lucene] branch main updated: Use radix sort to sort postings when index sorting is enabled. (#12114)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 805eb0b613e Use radix sort to sort postings when index sorting is enabled. (#12114)
805eb0b613e is described below

commit 805eb0b613e9711994da3ccafff8fca3a36ed382
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Wed Mar 15 11:56:45 2023 +0100

    Use radix sort to sort postings when index sorting is enabled. (#12114)
    
    This switches to LSBRadixSorter instead of TimSorter to sort postings whose
    index options are `DOCS`. On a synthetic benchmark this yielded barely any
    difference in the case when the index order is the same as the sort order, or
    reverse, but almost a 3x speedup for writing postings in the case when the
    index order is mostly random.
---
 .../apache/lucene/index/FreqProxTermsWriter.java   | 257 ++++++--------------
 .../org/apache/lucene/index/TestIndexSorting.java  | 268 +++++++++++++++++++++
 2 files changed, 341 insertions(+), 184 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
index ce5de0f3974..5ba0df2bed0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -35,9 +35,11 @@ import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LSBRadixSorter;
 import org.apache.lucene.util.LongsRef;
 import org.apache.lucene.util.TimSorter;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.packed.PackedInts;
 
 final class FreqProxTermsWriter extends TermsHash {
 
@@ -153,13 +155,12 @@ final class FreqProxTermsWriter extends TermsHash {
 
     @Override
     public TermsEnum iterator() throws IOException {
-      return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions());
+      return new SortingTermsEnum(in.iterator(), docMap, indexOptions);
     }
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
-      return new SortingTermsEnum(
-          in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions());
+      return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions);
     }
   }
 
@@ -167,20 +168,18 @@ final class FreqProxTermsWriter extends TermsHash {
 
     final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
     private final IndexOptions indexOptions;
-    private final boolean hasPositions;
 
-    SortingTermsEnum(
-        final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) {
+    SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions) {
       super(in);
       this.docMap = docMap;
       this.indexOptions = indexOptions;
-      this.hasPositions = hasPositions;
     }
 
     @Override
     public PostingsEnum postings(PostingsEnum reuse, final int flags) throws IOException {
 
-      if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
+      if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
+          && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS)) {
         final PostingsEnum inReuse;
         final SortingPostingsEnum wrapReuse;
         if (reuse != null && reuse instanceof SortingPostingsEnum) {
@@ -194,14 +193,16 @@ final class FreqProxTermsWriter extends TermsHash {
         }
 
         final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags);
-        // we ignore the fact that offsets may be stored but not asked for,
+        // we ignore the fact that positions/offsets may be stored but not asked for,
         // since this code is expected to be used during addIndexes which will
         // ask for everything. if that assumption changes in the future, we can
         // factor in whether 'flags' says offsets are not required.
+        final boolean storePositions =
+            indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
         final boolean storeOffsets =
             indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
         return new SortingPostingsEnum(
-            docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets);
+            docMap.size(), wrapReuse, inDocsAndPositions, docMap, storePositions, storeOffsets);
       }
 
       final PostingsEnum inReuse;
@@ -213,161 +214,53 @@ final class FreqProxTermsWriter extends TermsHash {
         inReuse = wrapReuse.getWrapped();
       } else {
         wrapReuse = null;
-        inReuse = reuse;
+        inReuse = null;
       }
 
       final PostingsEnum inDocs = in.postings(inReuse, flags);
-      final boolean withFreqs =
-          indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
-              && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
-      return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
+      return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, docMap);
     }
   }
 
-  static class SortingDocsEnum extends FilterLeafReader.FilterPostingsEnum {
+  static class SortingDocsEnum extends PostingsEnum {
 
-    private static final class DocFreqSorter extends TimSorter {
-
-      private int[] docs;
-      private int[] freqs;
-      private int[] tmpDocs;
-      private int[] tmpFreqs;
-
-      DocFreqSorter(int maxDoc) {
-        super(maxDoc / 8);
-        this.tmpDocs = IntsRef.EMPTY_INTS;
-      }
-
-      public void reset(int[] docs, int[] freqs) {
-        this.docs = docs;
-        this.freqs = freqs;
-        if (freqs != null && tmpFreqs == null) {
-          tmpFreqs = new int[tmpDocs.length];
-        }
-      }
-
-      @Override
-      protected int compare(int i, int j) {
-        return docs[i] - docs[j];
-      }
-
-      @Override
-      protected void swap(int i, int j) {
-        int tmpDoc = docs[i];
-        docs[i] = docs[j];
-        docs[j] = tmpDoc;
-
-        if (freqs != null) {
-          int tmpFreq = freqs[i];
-          freqs[i] = freqs[j];
-          freqs[j] = tmpFreq;
-        }
-      }
-
-      @Override
-      protected void copy(int src, int dest) {
-        docs[dest] = docs[src];
-        if (freqs != null) {
-          freqs[dest] = freqs[src];
-        }
-      }
-
-      @Override
-      protected void save(int i, int len) {
-        if (tmpDocs.length < len) {
-          tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
-          if (freqs != null) {
-            tmpFreqs = new int[tmpDocs.length];
-          }
-        }
-        System.arraycopy(docs, i, tmpDocs, 0, len);
-        if (freqs != null) {
-          System.arraycopy(freqs, i, tmpFreqs, 0, len);
-        }
-      }
-
-      @Override
-      protected void restore(int i, int j) {
-        docs[j] = tmpDocs[i];
-        if (freqs != null) {
-          freqs[j] = tmpFreqs[i];
-        }
-      }
-
-      @Override
-      protected int compareSaved(int i, int j) {
-        return tmpDocs[i] - docs[j];
-      }
-    }
-
-    private final int maxDoc;
-    private final DocFreqSorter sorter;
+    private final PostingsEnum in;
+    private final LSBRadixSorter sorter;
     private int[] docs;
-    private int[] freqs;
     private int docIt = -1;
-    private final int upto;
-    private final boolean withFreqs;
+    private final int upTo;
 
     SortingDocsEnum(
-        int maxDoc,
-        SortingDocsEnum reuse,
-        final PostingsEnum in,
-        boolean withFreqs,
-        final Sorter.DocMap docMap)
+        int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, final Sorter.DocMap docMap)
         throws IOException {
-      super(in);
-      this.maxDoc = maxDoc;
-      this.withFreqs = withFreqs;
       if (reuse != null) {
-        if (reuse.maxDoc == maxDoc) {
-          sorter = reuse.sorter;
-        } else {
-          sorter = new DocFreqSorter(maxDoc);
-        }
+        sorter = reuse.sorter;
         docs = reuse.docs;
-        freqs = reuse.freqs; // maybe null
       } else {
-        docs = new int[64];
-        sorter = new DocFreqSorter(maxDoc);
+        sorter = new LSBRadixSorter();
+        docs = IntsRef.EMPTY_INTS;
       }
-      docIt = -1;
+      this.in = in;
       int i = 0;
-      int doc;
-      if (withFreqs) {
-        if (freqs == null || freqs.length < docs.length) {
-          freqs = new int[docs.length];
-        }
-        while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-          if (i >= docs.length) {
-            docs = ArrayUtil.grow(docs, docs.length + 1);
-            freqs = ArrayUtil.grow(freqs, freqs.length + 1);
-          }
-          docs[i] = docMap.oldToNew(doc);
-          freqs[i] = in.freq();
-          ++i;
-        }
-      } else {
-        freqs = null;
-        while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-          if (i >= docs.length) {
-            docs = ArrayUtil.grow(docs, docs.length + 1);
-          }
-          docs[i++] = docMap.oldToNew(doc);
+      for (int doc = in.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = in.nextDoc()) {
+        if (docs.length <= i) {
+          docs = ArrayUtil.grow(docs);
         }
+        docs[i++] = docMap.oldToNew(doc);
       }
-      // TimSort can save much time compared to other sorts in case of
-      // reverse sorting, or when sorting a concatenation of sorted readers
-      sorter.reset(docs, freqs);
-      sorter.sort(0, i);
-      upto = i;
+      upTo = i;
+      if (docs.length == upTo) {
+        docs = ArrayUtil.grow(docs);
+      }
+      docs[upTo] = DocIdSetIterator.NO_MORE_DOCS;
+      final int numBits = PackedInts.bitsRequired(Math.max(0, maxDoc - 1));
+      // Even though LSBRadixSorter cannot take advantage of partial ordering like TimSorter it is
+      // often still faster for nearly-sorted inputs.
+      sorter.sort(numBits, docs, upTo);
     }
 
-    // for testing
-    boolean reused(PostingsEnum other) {
-      if (other == null || !(other instanceof SortingDocsEnum)) {
-        return false;
-      }
-      return docs == ((SortingDocsEnum) other).docs;
+    PostingsEnum getWrapped() {
+      return in;
     }
 
     @Override
@@ -379,27 +272,24 @@ final class FreqProxTermsWriter extends TermsHash {
 
     @Override
     public int docID() {
-      return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
+      return docIt < 0 ? -1 : docs[docIt];
     }
 
     @Override
-    public int freq() throws IOException {
-      return withFreqs && docIt < upto ? freqs[docIt] : 1;
+    public int nextDoc() throws IOException {
+      return docs[++docIt];
     }
 
     @Override
-    public int nextDoc() throws IOException {
-      if (++docIt >= upto) return NO_MORE_DOCS;
-      return docs[docIt];
+    public long cost() {
+      return upTo;
     }
 
-    /** Returns the wrapped {@link PostingsEnum}. */
-    PostingsEnum getWrapped() {
-      return in;
+    @Override
+    public int freq() throws IOException {
+      return 1;
     }
 
-    // we buffer up docs/freqs only, don't forward any positions requests to underlying enum
-
     @Override
     public int nextPosition() throws IOException {
       return -1;
@@ -496,7 +386,7 @@ final class FreqProxTermsWriter extends TermsHash {
     private final int upto;
 
     private final ByteBuffersDataInput postingInput;
-    private final boolean storeOffsets;
+    private final boolean storePositions, storeOffsets;
 
     private int docIt = -1;
     private int pos;
@@ -512,10 +402,12 @@ final class FreqProxTermsWriter extends TermsHash {
         SortingPostingsEnum reuse,
         final PostingsEnum in,
         Sorter.DocMap docMap,
+        boolean storePositions,
         boolean storeOffsets)
         throws IOException {
       super(in);
       this.maxDoc = maxDoc;
+      this.storePositions = storePositions;
       this.storeOffsets = storeOffsets;
       if (reuse != null) {
         docs = reuse.docs;
@@ -556,37 +448,31 @@ final class FreqProxTermsWriter extends TermsHash {
       this.postingInput = buffer.toDataInput();
     }
 
-    // for testing
-    boolean reused(PostingsEnum other) {
-      if (other == null || !(other instanceof SortingPostingsEnum)) {
-        return false;
-      }
-      return docs == ((SortingPostingsEnum) other).docs;
-    }
-
     private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException {
       int freq = in.freq();
       out.writeVInt(freq);
-      int previousPosition = 0;
-      int previousEndOffset = 0;
-      for (int i = 0; i < freq; i++) {
-        final int pos = in.nextPosition();
-        final BytesRef payload = in.getPayload();
-        // The low-order bit of token is set only if there is a payload, the
-        // previous bits are the delta-encoded position.
-        final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
-        out.writeVInt(token);
-        previousPosition = pos;
-        if (storeOffsets) { // don't encode offsets if they are not stored
-          final int startOffset = in.startOffset();
-          final int endOffset = in.endOffset();
-          out.writeVInt(startOffset - previousEndOffset);
-          out.writeVInt(endOffset - startOffset);
-          previousEndOffset = endOffset;
-        }
-        if (payload != null) {
-          out.writeVInt(payload.length);
-          out.writeBytes(payload.bytes, payload.offset, payload.length);
+      if (storePositions) {
+        int previousPosition = 0;
+        int previousEndOffset = 0;
+        for (int i = 0; i < freq; i++) {
+          final int pos = in.nextPosition();
+          final BytesRef payload = in.getPayload();
+          // The low-order bit of token is set only if there is a payload, the
+          // previous bits are the delta-encoded position.
+          final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
+          out.writeVInt(token);
+          previousPosition = pos;
+          if (storeOffsets) { // don't encode offsets if they are not stored
+            final int startOffset = in.startOffset();
+            final int endOffset = in.endOffset();
+            out.writeVInt(startOffset - previousEndOffset);
+            out.writeVInt(endOffset - startOffset);
+            previousEndOffset = endOffset;
+          }
+          if (payload != null) {
+            out.writeVInt(payload.length);
+            out.writeBytes(payload.bytes, payload.offset, payload.length);
+          }
         }
       }
     }
@@ -631,6 +517,9 @@ final class FreqProxTermsWriter extends TermsHash {
 
     @Override
     public int nextPosition() throws IOException {
+      if (storePositions == false) {
+        return -1;
+      }
       final int token = postingInput.readVInt();
       pos += token >>> 1;
       if (storeOffsets) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index df37ac70007..df051f3244b 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -62,6 +62,7 @@ import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
@@ -83,6 +84,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.TestUtil;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.NumericUtils;
@@ -2905,4 +2907,270 @@ public class TestIndexSorting extends LuceneTestCase {
     w.close();
     dir.close();
   }
+
+  public void testSortDocs() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig config = newIndexWriterConfig();
+    config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+    IndexWriter w = new IndexWriter(dir, config);
+    Document doc = new Document();
+    NumericDocValuesField sort = new NumericDocValuesField("sort", 0L);
+    doc.add(sort);
+    StringField field = new StringField("field", "a", Field.Store.NO);
+    doc.add(field);
+    w.addDocument(doc);
+    sort.setLongValue(1);
+    field.setStringValue("b");
+    w.addDocument(doc);
+    sort.setLongValue(-1);
+    field.setStringValue("a");
+    w.addDocument(doc);
+    sort.setLongValue(2);
+    field.setStringValue("a");
+    w.addDocument(doc);
+    sort.setLongValue(3);
+    field.setStringValue("b");
+    w.addDocument(doc);
+    w.forceMerge(1);
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leafReader = getOnlyLeafReader(reader);
+    TermsEnum fieldTerms = leafReader.terms("field").iterator();
+    assertEquals(new BytesRef("a"), fieldTerms.next());
+    PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+    assertEquals(0, postings.nextDoc());
+    assertEquals(1, postings.nextDoc());
+    assertEquals(3, postings.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertEquals(new BytesRef("b"), fieldTerms.next());
+    postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+    assertEquals(2, postings.nextDoc());
+    assertEquals(4, postings.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertNull(fieldTerms.next());
+    reader.close();
+    dir.close();
+  }
+
+  public void testSortDocsAndFreqs() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig config = newIndexWriterConfig();
+    config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+    IndexWriter w = new IndexWriter(dir, config);
+    FieldType ft = new FieldType();
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+    ft.setTokenized(false);
+    ft.freeze();
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 0L));
+    doc.add(new Field("field", "a", ft));
+    doc.add(new Field("field", "a", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 1L));
+    doc.add(new Field("field", "b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", -1L));
+    doc.add(new Field("field", "a", ft));
+    doc.add(new Field("field", "a", ft));
+    doc.add(new Field("field", "a", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 2L));
+    doc.add(new Field("field", "a", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 3L));
+    doc.add(new Field("field", "b", ft));
+    doc.add(new Field("field", "b", ft));
+    doc.add(new Field("field", "b", ft));
+    w.addDocument(doc);
+    w.forceMerge(1);
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leafReader = getOnlyLeafReader(reader);
+    TermsEnum fieldTerms = leafReader.terms("field").iterator();
+    assertEquals(new BytesRef("a"), fieldTerms.next());
+    PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+    assertEquals(0, postings.nextDoc());
+    assertEquals(3, postings.freq());
+    assertEquals(1, postings.nextDoc());
+    assertEquals(2, postings.freq());
+    assertEquals(3, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertEquals(new BytesRef("b"), fieldTerms.next());
+    postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+    assertEquals(2, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(4, postings.nextDoc());
+    assertEquals(3, postings.freq());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertNull(fieldTerms.next());
+    reader.close();
+    dir.close();
+  }
+
+  public void testSortDocsAndFreqsAndPositions() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
+    config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+    IndexWriter w = new IndexWriter(dir, config);
+    FieldType ft = new FieldType();
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+    ft.setTokenized(true);
+    ft.freeze();
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 0L));
+    doc.add(new Field("field", "a a b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 1L));
+    doc.add(new Field("field", "b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", -1L));
+    doc.add(new Field("field", "b a b b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 2L));
+    doc.add(new Field("field", "a", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 3L));
+    doc.add(new Field("field", "b b", ft));
+    w.addDocument(doc);
+    w.forceMerge(1);
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leafReader = getOnlyLeafReader(reader);
+    TermsEnum fieldTerms = leafReader.terms("field").iterator();
+    assertEquals(new BytesRef("a"), fieldTerms.next());
+    PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+    assertEquals(0, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(1, postings.nextPosition());
+    assertEquals(1, postings.nextDoc());
+    assertEquals(2, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(1, postings.nextPosition());
+    assertEquals(3, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertEquals(new BytesRef("b"), fieldTerms.next());
+    postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+    assertEquals(0, postings.nextDoc());
+    assertEquals(3, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(2, postings.nextPosition());
+    assertEquals(3, postings.nextPosition());
+    assertEquals(1, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(2, postings.nextPosition());
+    assertEquals(2, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(4, postings.nextDoc());
+    assertEquals(2, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(1, postings.nextPosition());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertNull(fieldTerms.next());
+    reader.close();
+    dir.close();
+  }
+
+  public void testSortDocsAndFreqsAndPositionsAndOffsets() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
+    config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+    IndexWriter w = new IndexWriter(dir, config);
+    FieldType ft = new FieldType();
+    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    ft.setTokenized(true);
+    ft.freeze();
+    Document doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 0L));
+    doc.add(new Field("field", "a a b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 1L));
+    doc.add(new Field("field", "b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", -1L));
+    doc.add(new Field("field", "b a b b", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 2L));
+    doc.add(new Field("field", "a", ft));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new NumericDocValuesField("sort", 3L));
+    doc.add(new Field("field", "b b", ft));
+    w.addDocument(doc);
+    w.forceMerge(1);
+    DirectoryReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leafReader = getOnlyLeafReader(reader);
+    TermsEnum fieldTerms = leafReader.terms("field").iterator();
+    assertEquals(new BytesRef("a"), fieldTerms.next());
+    PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+    assertEquals(0, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(1, postings.nextPosition());
+    assertEquals(2, postings.startOffset());
+    assertEquals(3, postings.endOffset());
+    assertEquals(1, postings.nextDoc());
+    assertEquals(2, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(0, postings.startOffset());
+    assertEquals(1, postings.endOffset());
+    assertEquals(1, postings.nextPosition());
+    assertEquals(2, postings.startOffset());
+    assertEquals(3, postings.endOffset());
+    assertEquals(3, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(0, postings.startOffset());
+    assertEquals(1, postings.endOffset());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertEquals(new BytesRef("b"), fieldTerms.next());
+    postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+    assertEquals(0, postings.nextDoc());
+    assertEquals(3, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(0, postings.startOffset());
+    assertEquals(1, postings.endOffset());
+    assertEquals(2, postings.nextPosition());
+    assertEquals(4, postings.startOffset());
+    assertEquals(5, postings.endOffset());
+    assertEquals(3, postings.nextPosition());
+    assertEquals(6, postings.startOffset());
+    assertEquals(7, postings.endOffset());
+    assertEquals(1, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(2, postings.nextPosition());
+    assertEquals(4, postings.startOffset());
+    assertEquals(5, postings.endOffset());
+    assertEquals(2, postings.nextDoc());
+    assertEquals(1, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(0, postings.startOffset());
+    assertEquals(1, postings.endOffset());
+    assertEquals(4, postings.nextDoc());
+    assertEquals(2, postings.freq());
+    assertEquals(0, postings.nextPosition());
+    assertEquals(0, postings.startOffset());
+    assertEquals(1, postings.endOffset());
+    assertEquals(1, postings.nextPosition());
+    assertEquals(2, postings.startOffset());
+    assertEquals(3, postings.endOffset());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+    assertNull(fieldTerms.next());
+    reader.close();
+    dir.close();
+  }
 }