You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2023/03/15 10:56:51 UTC
[lucene] branch main updated: Use radix sort to sort postings when index sorting is enabled. (#12114)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 805eb0b613e Use radix sort to sort postings when index sorting is enabled. (#12114)
805eb0b613e is described below
commit 805eb0b613e9711994da3ccafff8fca3a36ed382
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Wed Mar 15 11:56:45 2023 +0100
Use radix sort to sort postings when index sorting is enabled. (#12114)
This switches to LSBRadixSorter instead of TimSorter to sort postings whose
index options are `DOCS`. On a synthetic benchmark this yielded barely any
difference in the case when the index order is the same as the sort order, or
reverse, but almost a 3x speedup for writing postings in the case when the
index order is mostly random.
---
.../apache/lucene/index/FreqProxTermsWriter.java | 257 ++++++--------------
.../org/apache/lucene/index/TestIndexSorting.java | 268 +++++++++++++++++++++
2 files changed, 341 insertions(+), 184 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
index ce5de0f3974..5ba0df2bed0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -35,9 +35,11 @@ import org.apache.lucene.util.Counter;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LSBRadixSorter;
import org.apache.lucene.util.LongsRef;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.packed.PackedInts;
final class FreqProxTermsWriter extends TermsHash {
@@ -153,13 +155,12 @@ final class FreqProxTermsWriter extends TermsHash {
@Override
public TermsEnum iterator() throws IOException {
- return new SortingTermsEnum(in.iterator(), docMap, indexOptions, hasPositions());
+ return new SortingTermsEnum(in.iterator(), docMap, indexOptions);
}
@Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
- return new SortingTermsEnum(
- in.intersect(compiled, startTerm), docMap, indexOptions, hasPositions());
+ return new SortingTermsEnum(in.intersect(compiled, startTerm), docMap, indexOptions);
}
}
@@ -167,20 +168,18 @@ final class FreqProxTermsWriter extends TermsHash {
final Sorter.DocMap docMap; // pkg-protected to avoid synthetic accessor methods
private final IndexOptions indexOptions;
- private final boolean hasPositions;
- SortingTermsEnum(
- final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions, boolean hasPositions) {
+ SortingTermsEnum(final TermsEnum in, Sorter.DocMap docMap, IndexOptions indexOptions) {
super(in);
this.docMap = docMap;
this.indexOptions = indexOptions;
- this.hasPositions = hasPositions;
}
@Override
public PostingsEnum postings(PostingsEnum reuse, final int flags) throws IOException {
- if (hasPositions && PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) {
+ if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
+ && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS)) {
final PostingsEnum inReuse;
final SortingPostingsEnum wrapReuse;
if (reuse != null && reuse instanceof SortingPostingsEnum) {
@@ -194,14 +193,16 @@ final class FreqProxTermsWriter extends TermsHash {
}
final PostingsEnum inDocsAndPositions = in.postings(inReuse, flags);
- // we ignore the fact that offsets may be stored but not asked for,
+ // we ignore the fact that positions/offsets may be stored but not asked for,
// since this code is expected to be used during addIndexes which will
// ask for everything. if that assumption changes in the future, we can
// factor in whether 'flags' says offsets are not required.
+ final boolean storePositions =
+ indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean storeOffsets =
indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
return new SortingPostingsEnum(
- docMap.size(), wrapReuse, inDocsAndPositions, docMap, storeOffsets);
+ docMap.size(), wrapReuse, inDocsAndPositions, docMap, storePositions, storeOffsets);
}
final PostingsEnum inReuse;
@@ -213,161 +214,53 @@ final class FreqProxTermsWriter extends TermsHash {
inReuse = wrapReuse.getWrapped();
} else {
wrapReuse = null;
- inReuse = reuse;
+ inReuse = null;
}
final PostingsEnum inDocs = in.postings(inReuse, flags);
- final boolean withFreqs =
- indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
- && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
- return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, withFreqs, docMap);
+ return new SortingDocsEnum(docMap.size(), wrapReuse, inDocs, docMap);
}
}
- static class SortingDocsEnum extends FilterLeafReader.FilterPostingsEnum {
+ static class SortingDocsEnum extends PostingsEnum {
- private static final class DocFreqSorter extends TimSorter {
-
- private int[] docs;
- private int[] freqs;
- private int[] tmpDocs;
- private int[] tmpFreqs;
-
- DocFreqSorter(int maxDoc) {
- super(maxDoc / 8);
- this.tmpDocs = IntsRef.EMPTY_INTS;
- }
-
- public void reset(int[] docs, int[] freqs) {
- this.docs = docs;
- this.freqs = freqs;
- if (freqs != null && tmpFreqs == null) {
- tmpFreqs = new int[tmpDocs.length];
- }
- }
-
- @Override
- protected int compare(int i, int j) {
- return docs[i] - docs[j];
- }
-
- @Override
- protected void swap(int i, int j) {
- int tmpDoc = docs[i];
- docs[i] = docs[j];
- docs[j] = tmpDoc;
-
- if (freqs != null) {
- int tmpFreq = freqs[i];
- freqs[i] = freqs[j];
- freqs[j] = tmpFreq;
- }
- }
-
- @Override
- protected void copy(int src, int dest) {
- docs[dest] = docs[src];
- if (freqs != null) {
- freqs[dest] = freqs[src];
- }
- }
-
- @Override
- protected void save(int i, int len) {
- if (tmpDocs.length < len) {
- tmpDocs = new int[ArrayUtil.oversize(len, Integer.BYTES)];
- if (freqs != null) {
- tmpFreqs = new int[tmpDocs.length];
- }
- }
- System.arraycopy(docs, i, tmpDocs, 0, len);
- if (freqs != null) {
- System.arraycopy(freqs, i, tmpFreqs, 0, len);
- }
- }
-
- @Override
- protected void restore(int i, int j) {
- docs[j] = tmpDocs[i];
- if (freqs != null) {
- freqs[j] = tmpFreqs[i];
- }
- }
-
- @Override
- protected int compareSaved(int i, int j) {
- return tmpDocs[i] - docs[j];
- }
- }
-
- private final int maxDoc;
- private final DocFreqSorter sorter;
+ private final PostingsEnum in;
+ private final LSBRadixSorter sorter;
private int[] docs;
- private int[] freqs;
private int docIt = -1;
- private final int upto;
- private final boolean withFreqs;
+ private final int upTo;
SortingDocsEnum(
- int maxDoc,
- SortingDocsEnum reuse,
- final PostingsEnum in,
- boolean withFreqs,
- final Sorter.DocMap docMap)
+ int maxDoc, SortingDocsEnum reuse, final PostingsEnum in, final Sorter.DocMap docMap)
throws IOException {
- super(in);
- this.maxDoc = maxDoc;
- this.withFreqs = withFreqs;
if (reuse != null) {
- if (reuse.maxDoc == maxDoc) {
- sorter = reuse.sorter;
- } else {
- sorter = new DocFreqSorter(maxDoc);
- }
+ sorter = reuse.sorter;
docs = reuse.docs;
- freqs = reuse.freqs; // maybe null
} else {
- docs = new int[64];
- sorter = new DocFreqSorter(maxDoc);
+ sorter = new LSBRadixSorter();
+ docs = IntsRef.EMPTY_INTS;
}
- docIt = -1;
+ this.in = in;
int i = 0;
- int doc;
- if (withFreqs) {
- if (freqs == null || freqs.length < docs.length) {
- freqs = new int[docs.length];
- }
- while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (i >= docs.length) {
- docs = ArrayUtil.grow(docs, docs.length + 1);
- freqs = ArrayUtil.grow(freqs, freqs.length + 1);
- }
- docs[i] = docMap.oldToNew(doc);
- freqs[i] = in.freq();
- ++i;
- }
- } else {
- freqs = null;
- while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (i >= docs.length) {
- docs = ArrayUtil.grow(docs, docs.length + 1);
- }
- docs[i++] = docMap.oldToNew(doc);
+ for (int doc = in.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = in.nextDoc()) {
+ if (docs.length <= i) {
+ docs = ArrayUtil.grow(docs);
}
+ docs[i++] = docMap.oldToNew(doc);
}
- // TimSort can save much time compared to other sorts in case of
- // reverse sorting, or when sorting a concatenation of sorted readers
- sorter.reset(docs, freqs);
- sorter.sort(0, i);
- upto = i;
+ upTo = i;
+ if (docs.length == upTo) {
+ docs = ArrayUtil.grow(docs);
+ }
+ docs[upTo] = DocIdSetIterator.NO_MORE_DOCS;
+ final int numBits = PackedInts.bitsRequired(Math.max(0, maxDoc - 1));
+ // Even though LSBRadixSorter cannot take advantage of partial ordering like TimSorter it is
+ // often still faster for nearly-sorted inputs.
+ sorter.sort(numBits, docs, upTo);
}
- // for testing
- boolean reused(PostingsEnum other) {
- if (other == null || !(other instanceof SortingDocsEnum)) {
- return false;
- }
- return docs == ((SortingDocsEnum) other).docs;
+ PostingsEnum getWrapped() {
+ return in;
}
@Override
@@ -379,27 +272,24 @@ final class FreqProxTermsWriter extends TermsHash {
@Override
public int docID() {
- return docIt < 0 ? -1 : docIt >= upto ? NO_MORE_DOCS : docs[docIt];
+ return docIt < 0 ? -1 : docs[docIt];
}
@Override
- public int freq() throws IOException {
- return withFreqs && docIt < upto ? freqs[docIt] : 1;
+ public int nextDoc() throws IOException {
+ return docs[++docIt];
}
@Override
- public int nextDoc() throws IOException {
- if (++docIt >= upto) return NO_MORE_DOCS;
- return docs[docIt];
+ public long cost() {
+ return upTo;
}
- /** Returns the wrapped {@link PostingsEnum}. */
- PostingsEnum getWrapped() {
- return in;
+ @Override
+ public int freq() throws IOException {
+ return 1;
}
- // we buffer up docs/freqs only, don't forward any positions requests to underlying enum
-
@Override
public int nextPosition() throws IOException {
return -1;
@@ -496,7 +386,7 @@ final class FreqProxTermsWriter extends TermsHash {
private final int upto;
private final ByteBuffersDataInput postingInput;
- private final boolean storeOffsets;
+ private final boolean storePositions, storeOffsets;
private int docIt = -1;
private int pos;
@@ -512,10 +402,12 @@ final class FreqProxTermsWriter extends TermsHash {
SortingPostingsEnum reuse,
final PostingsEnum in,
Sorter.DocMap docMap,
+ boolean storePositions,
boolean storeOffsets)
throws IOException {
super(in);
this.maxDoc = maxDoc;
+ this.storePositions = storePositions;
this.storeOffsets = storeOffsets;
if (reuse != null) {
docs = reuse.docs;
@@ -556,37 +448,31 @@ final class FreqProxTermsWriter extends TermsHash {
this.postingInput = buffer.toDataInput();
}
- // for testing
- boolean reused(PostingsEnum other) {
- if (other == null || !(other instanceof SortingPostingsEnum)) {
- return false;
- }
- return docs == ((SortingPostingsEnum) other).docs;
- }
-
private void addPositions(final PostingsEnum in, final DataOutput out) throws IOException {
int freq = in.freq();
out.writeVInt(freq);
- int previousPosition = 0;
- int previousEndOffset = 0;
- for (int i = 0; i < freq; i++) {
- final int pos = in.nextPosition();
- final BytesRef payload = in.getPayload();
- // The low-order bit of token is set only if there is a payload, the
- // previous bits are the delta-encoded position.
- final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
- out.writeVInt(token);
- previousPosition = pos;
- if (storeOffsets) { // don't encode offsets if they are not stored
- final int startOffset = in.startOffset();
- final int endOffset = in.endOffset();
- out.writeVInt(startOffset - previousEndOffset);
- out.writeVInt(endOffset - startOffset);
- previousEndOffset = endOffset;
- }
- if (payload != null) {
- out.writeVInt(payload.length);
- out.writeBytes(payload.bytes, payload.offset, payload.length);
+ if (storePositions) {
+ int previousPosition = 0;
+ int previousEndOffset = 0;
+ for (int i = 0; i < freq; i++) {
+ final int pos = in.nextPosition();
+ final BytesRef payload = in.getPayload();
+ // The low-order bit of token is set only if there is a payload, the
+ // previous bits are the delta-encoded position.
+ final int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
+ out.writeVInt(token);
+ previousPosition = pos;
+ if (storeOffsets) { // don't encode offsets if they are not stored
+ final int startOffset = in.startOffset();
+ final int endOffset = in.endOffset();
+ out.writeVInt(startOffset - previousEndOffset);
+ out.writeVInt(endOffset - startOffset);
+ previousEndOffset = endOffset;
+ }
+ if (payload != null) {
+ out.writeVInt(payload.length);
+ out.writeBytes(payload.bytes, payload.offset, payload.length);
+ }
}
}
}
@@ -631,6 +517,9 @@ final class FreqProxTermsWriter extends TermsHash {
@Override
public int nextPosition() throws IOException {
+ if (storePositions == false) {
+ return -1;
+ }
final int token = postingInput.readVInt();
pos += token >>> 1;
if (storeOffsets) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index df37ac70007..df051f3244b 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -62,6 +62,7 @@ import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@@ -83,6 +84,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils;
@@ -2905,4 +2907,270 @@ public class TestIndexSorting extends LuceneTestCase {
w.close();
dir.close();
}
+
+ public void testSortDocs() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig();
+ config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+ IndexWriter w = new IndexWriter(dir, config);
+ Document doc = new Document();
+ NumericDocValuesField sort = new NumericDocValuesField("sort", 0L);
+ doc.add(sort);
+ StringField field = new StringField("field", "a", Field.Store.NO);
+ doc.add(field);
+ w.addDocument(doc);
+ sort.setLongValue(1);
+ field.setStringValue("b");
+ w.addDocument(doc);
+ sort.setLongValue(-1);
+ field.setStringValue("a");
+ w.addDocument(doc);
+ sort.setLongValue(2);
+ field.setStringValue("a");
+ w.addDocument(doc);
+ sort.setLongValue(3);
+ field.setStringValue("b");
+ w.addDocument(doc);
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ w.close();
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ TermsEnum fieldTerms = leafReader.terms("field").iterator();
+ assertEquals(new BytesRef("a"), fieldTerms.next());
+ PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+ assertEquals(0, postings.nextDoc());
+ assertEquals(1, postings.nextDoc());
+ assertEquals(3, postings.nextDoc());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertEquals(new BytesRef("b"), fieldTerms.next());
+ postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+ assertEquals(2, postings.nextDoc());
+ assertEquals(4, postings.nextDoc());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertNull(fieldTerms.next());
+ reader.close();
+ dir.close();
+ }
+
+ public void testSortDocsAndFreqs() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig();
+ config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+ IndexWriter w = new IndexWriter(dir, config);
+ FieldType ft = new FieldType();
+ ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+ ft.setTokenized(false);
+ ft.freeze();
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 0L));
+ doc.add(new Field("field", "a", ft));
+ doc.add(new Field("field", "a", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 1L));
+ doc.add(new Field("field", "b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", -1L));
+ doc.add(new Field("field", "a", ft));
+ doc.add(new Field("field", "a", ft));
+ doc.add(new Field("field", "a", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 2L));
+ doc.add(new Field("field", "a", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 3L));
+ doc.add(new Field("field", "b", ft));
+ doc.add(new Field("field", "b", ft));
+ doc.add(new Field("field", "b", ft));
+ w.addDocument(doc);
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ w.close();
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ TermsEnum fieldTerms = leafReader.terms("field").iterator();
+ assertEquals(new BytesRef("a"), fieldTerms.next());
+ PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+ assertEquals(0, postings.nextDoc());
+ assertEquals(3, postings.freq());
+ assertEquals(1, postings.nextDoc());
+ assertEquals(2, postings.freq());
+ assertEquals(3, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertEquals(new BytesRef("b"), fieldTerms.next());
+ postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+ assertEquals(2, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(4, postings.nextDoc());
+ assertEquals(3, postings.freq());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertNull(fieldTerms.next());
+ reader.close();
+ dir.close();
+ }
+
+ public void testSortDocsAndFreqsAndPositions() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
+ config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+ IndexWriter w = new IndexWriter(dir, config);
+ FieldType ft = new FieldType();
+ ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+ ft.setTokenized(true);
+ ft.freeze();
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 0L));
+ doc.add(new Field("field", "a a b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 1L));
+ doc.add(new Field("field", "b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", -1L));
+ doc.add(new Field("field", "b a b b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 2L));
+ doc.add(new Field("field", "a", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 3L));
+ doc.add(new Field("field", "b b", ft));
+ w.addDocument(doc);
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ w.close();
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ TermsEnum fieldTerms = leafReader.terms("field").iterator();
+ assertEquals(new BytesRef("a"), fieldTerms.next());
+ PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+ assertEquals(0, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(1, postings.nextPosition());
+ assertEquals(1, postings.nextDoc());
+ assertEquals(2, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(1, postings.nextPosition());
+ assertEquals(3, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertEquals(new BytesRef("b"), fieldTerms.next());
+ postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+ assertEquals(0, postings.nextDoc());
+ assertEquals(3, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(2, postings.nextPosition());
+ assertEquals(3, postings.nextPosition());
+ assertEquals(1, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(2, postings.nextPosition());
+ assertEquals(2, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(4, postings.nextDoc());
+ assertEquals(2, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(1, postings.nextPosition());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertNull(fieldTerms.next());
+ reader.close();
+ dir.close();
+ }
+
+ public void testSortDocsAndFreqsAndPositionsAndOffsets() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random()));
+ config.setIndexSort(new Sort(new SortField("sort", SortField.Type.LONG)));
+ IndexWriter w = new IndexWriter(dir, config);
+ FieldType ft = new FieldType();
+ ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ ft.setTokenized(true);
+ ft.freeze();
+ Document doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 0L));
+ doc.add(new Field("field", "a a b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 1L));
+ doc.add(new Field("field", "b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", -1L));
+ doc.add(new Field("field", "b a b b", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 2L));
+ doc.add(new Field("field", "a", ft));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new NumericDocValuesField("sort", 3L));
+ doc.add(new Field("field", "b b", ft));
+ w.addDocument(doc);
+ w.forceMerge(1);
+ DirectoryReader reader = DirectoryReader.open(w);
+ w.close();
+ LeafReader leafReader = getOnlyLeafReader(reader);
+ TermsEnum fieldTerms = leafReader.terms("field").iterator();
+ assertEquals(new BytesRef("a"), fieldTerms.next());
+ PostingsEnum postings = fieldTerms.postings(null, PostingsEnum.ALL);
+ assertEquals(0, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(1, postings.nextPosition());
+ assertEquals(2, postings.startOffset());
+ assertEquals(3, postings.endOffset());
+ assertEquals(1, postings.nextDoc());
+ assertEquals(2, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(0, postings.startOffset());
+ assertEquals(1, postings.endOffset());
+ assertEquals(1, postings.nextPosition());
+ assertEquals(2, postings.startOffset());
+ assertEquals(3, postings.endOffset());
+ assertEquals(3, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(0, postings.startOffset());
+ assertEquals(1, postings.endOffset());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertEquals(new BytesRef("b"), fieldTerms.next());
+ postings = fieldTerms.postings(postings, PostingsEnum.ALL);
+ assertEquals(0, postings.nextDoc());
+ assertEquals(3, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(0, postings.startOffset());
+ assertEquals(1, postings.endOffset());
+ assertEquals(2, postings.nextPosition());
+ assertEquals(4, postings.startOffset());
+ assertEquals(5, postings.endOffset());
+ assertEquals(3, postings.nextPosition());
+ assertEquals(6, postings.startOffset());
+ assertEquals(7, postings.endOffset());
+ assertEquals(1, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(2, postings.nextPosition());
+ assertEquals(4, postings.startOffset());
+ assertEquals(5, postings.endOffset());
+ assertEquals(2, postings.nextDoc());
+ assertEquals(1, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(0, postings.startOffset());
+ assertEquals(1, postings.endOffset());
+ assertEquals(4, postings.nextDoc());
+ assertEquals(2, postings.freq());
+ assertEquals(0, postings.nextPosition());
+ assertEquals(0, postings.startOffset());
+ assertEquals(1, postings.endOffset());
+ assertEquals(1, postings.nextPosition());
+ assertEquals(2, postings.startOffset());
+ assertEquals(3, postings.endOffset());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
+ assertNull(fieldTerms.next());
+ reader.close();
+ dir.close();
+ }
}