You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2019/04/04 15:11:15 UTC
[lucene-solr] branch branch_8x updated: LUCENE-8671: Expose FST
off/on-heap options on Lucene50PostingsFormat (#613)
This is an automated email from the ASF dual-hosted git repository.
simonw pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 28e8a30 LUCENE-8671: Expose FST off/on-heap options on Lucene50PostingsFormat (#613)
28e8a30 is described below
commit 28e8a30b536a39e5539ac6e8b9407d31724c8857
Author: Simon Willnauer <si...@apache.org>
AuthorDate: Thu Apr 4 16:59:37 2019 +0200
LUCENE-8671: Expose FST off/on-heap options on Lucene50PostingsFormat (#613)
Before we can expose options to configure this postings format
on a per-reader basis we need to expose the option to load the terms
index FST off or on heap on the postings format. This already allows to
change the default in a per-field posting format if an expert user
wants to change the defaults. This essentially provides the ability to change
defaults globally while still involving some glue code.
---
.../codecs/blocktree/BlockTreeTermsReader.java | 6 +-
.../lucene/codecs/blocktree/FieldReader.java | 34 ++++--
.../codecs/lucene50/Lucene50PostingsFormat.java | 53 +++++++--
.../lucene/codecs/lucene80/Lucene80Codec.java | 11 +-
.../codecs/lucene50/TestBlockPostingsFormat.java | 118 ++++++++++++++++++++-
...tLucene50StoredFieldsFormatHighCompression.java | 12 ++-
.../codecs/cheapbastard/CheapBastardCodec.java | 3 +-
.../mockrandom/MockRandomPostingsFormat.java | 3 +-
.../java/org/apache/lucene/index/RandomCodec.java | 5 +-
.../util/TestRuleSetupAndRestoreClassEnv.java | 4 +-
.../src/java/org/apache/lucene/util/TestUtil.java | 4 +-
.../org/apache/solr/core/SchemaCodecFactory.java | 3 +-
12 files changed, 220 insertions(+), 36 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
index 2f7689c..eb341a0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
@@ -28,6 +28,7 @@ import java.util.TreeMap;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsReaderBase;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@@ -118,7 +119,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
final int version;
/** Sole constructor. */
- public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {
+ public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, Lucene50PostingsFormat.FSTLoadMode fstLoadMode) throws IOException {
boolean success = false;
this.postingsReader = postingsReader;
@@ -159,7 +160,6 @@ public final class BlockTreeTermsReader extends FieldsProducer {
if (numFields < 0) {
throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
}
-
for (int i = 0; i < numFields; ++i) {
final int field = termsIn.readVInt();
final long numTerms = termsIn.readVLong();
@@ -193,7 +193,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
final long indexStartFP = indexIn.readVLong();
FieldReader previous = fields.put(fieldInfo.name,
new FieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
- indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter));
+ indexStartFP, longsSize, indexIn, minTerm, maxTerm, state.openedFromWriter, fstLoadMode));
if (previous != null) {
throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 751ea42..94dd1dc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -16,11 +16,11 @@
*/
package org.apache.lucene.codecs.blocktree;
-
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Terms;
@@ -67,14 +67,14 @@ public final class FieldReader extends Terms implements Accountable {
//private boolean DEBUG;
FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
- long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter) throws IOException {
+ long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm, boolean openedFromWriter, Lucene50PostingsFormat.FSTLoadMode fstLoadMode) throws IOException {
assert numTerms > 0;
this.fieldInfo = fieldInfo;
//DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
this.parent = parent;
this.numTerms = numTerms;
- this.sumTotalTermFreq = sumTotalTermFreq;
- this.sumDocFreq = sumDocFreq;
+ this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
this.indexStartFP = indexStartFP;
this.rootCode = rootCode;
@@ -84,21 +84,33 @@ public final class FieldReader extends Terms implements Accountable {
// if (DEBUG) {
// System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
// }
-
rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
-
+ // Initialize FST offheap if index is MMapDirectory and
+ // docCount != sumDocFreq implying field is not primary key
if (indexIn != null) {
+ switch (fstLoadMode) {
+ case ON_HEAP:
+ isFSTOffHeap = false;
+ break;
+ case OFF_HEAP:
+ isFSTOffHeap = true;
+ break;
+ case OPTIMIZE_UPDATES_OFF_HEAP:
+ isFSTOffHeap = ((this.docCount != this.sumDocFreq) || openedFromWriter == false);
+ break;
+ case AUTO:
+ isFSTOffHeap = ((this.docCount != this.sumDocFreq) || openedFromWriter == false) && indexIn instanceof ByteBufferIndexInput;
+ break;
+ default:
+ throw new IllegalStateException("unknown enum constant: " + fstLoadMode);
+ }
final IndexInput clone = indexIn.clone();
clone.seek(indexStartFP);
- // Initialize FST offheap if index is MMapDirectory and
- // docCount != sumDocFreq implying field is not primary key
- isFSTOffHeap = clone instanceof ByteBufferIndexInput && ((this.docCount != this.sumDocFreq) || openedFromWriter == false);
if (isFSTOffHeap) {
index = new FST<>(clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
} else {
index = new FST<>(clone, ByteSequenceOutputs.getSingleton());
}
-
/*
if (false) {
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
@@ -109,8 +121,8 @@ public final class FieldReader extends Terms implements Accountable {
}
*/
} else {
- index = null;
isFSTOffHeap = false;
+ index = null;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
index a99894a..32f89eb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsFormat.java
@@ -16,8 +16,6 @@
*/
package org.apache.lucene.codecs.lucene50;
-
-
import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
@@ -353,6 +351,7 @@ import org.apache.lucene.util.packed.PackedInts;
*/
public final class Lucene50PostingsFormat extends PostingsFormat {
+
/**
* Filename extension for document number, frequencies, and skip data.
* See chapter: <a href="#Frequencies">Frequencies and Skip Data</a>
@@ -370,7 +369,10 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
* See chapter: <a href="#Payloads">Payloads and Offsets</a>
*/
public static final String PAY_EXTENSION = "pay";
-
+
+ /** Attribute key for fst mode. */
+ static final String MODE_KEY = Lucene50PostingsFormat.class.getSimpleName() + ".fstMode";
+
/**
* Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
@@ -389,6 +391,37 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
private final int minTermBlockSize;
private final int maxTermBlockSize;
+ private final FSTLoadMode fstLoadMode;
+
+ /**
+ * An enum that allows to control if term index FSTs are loaded into memory or read off-heap
+ */
+ public enum FSTLoadMode {
+ /**
+ * Always read FSTs from disk.
+ * NOTE: If this option is used the FST will be read off-heap even if buffered directory implementations
+ * are used.
+ */
+ OFF_HEAP,
+ /**
+ * Never read FSTs from disk ie. all fields FSTs are loaded into memory
+ */
+ ON_HEAP,
+ /**
+ * Always read FSTs from disk.
+ * An exception is made for ID fields in an IndexWriter context which are always loaded into memory.
+ * This is useful to guarantee best update performance even if a non MMapDirectory is used.
+ * NOTE: If this option is used the FST will be read off-heap even if buffered directory implementations
+ * are used.
+ * See {@link FSTLoadMode#AUTO}
+ */
+ OPTIMIZE_UPDATES_OFF_HEAP,
+ /**
+ * Automatically make the decision if FSTs are read from disk depending if the segment read from an MMAPDirectory
+ * An exception is made for ID fields in an IndexWriter context which are always loaded into memory.
+ */
+ AUTO
+ }
/**
* Fixed packed block size, number of integers encoded in
@@ -400,18 +433,19 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
/** Creates {@code Lucene50PostingsFormat} with default
* settings. */
public Lucene50PostingsFormat() {
- this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE, FSTLoadMode.AUTO);
}
/** Creates {@code Lucene50PostingsFormat} with custom
* values for {@code minBlockSize} and {@code
* maxBlockSize} passed to block terms dictionary.
* @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
- public Lucene50PostingsFormat(int minTermBlockSize, int maxTermBlockSize) {
+ public Lucene50PostingsFormat(int minTermBlockSize, int maxTermBlockSize, FSTLoadMode fstLoadMode) {
super("Lucene50");
BlockTreeTermsWriter.validateSettings(minTermBlockSize, maxTermBlockSize);
this.minTermBlockSize = minTermBlockSize;
this.maxTermBlockSize = maxTermBlockSize;
+ this.fstLoadMode = fstLoadMode;
}
@Override
@@ -422,7 +456,7 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
-
+ state.segmentInfo.putAttribute(MODE_KEY, fstLoadMode.name());
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state,
@@ -441,9 +475,14 @@ public final class Lucene50PostingsFormat extends PostingsFormat {
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
+ String fstLoadModeKey = state.segmentInfo.getAttribute(MODE_KEY);
+ FSTLoadMode fstLoadMode = FSTLoadMode.AUTO;
+ if (fstLoadModeKey != null) {
+ fstLoadMode = FSTLoadMode.valueOf(fstLoadModeKey);
+ }
boolean success = false;
try {
- FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
+ FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state, fstLoadMode);
success = true;
return ret;
} finally {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80Codec.java
index 93e91ea..aa55f5f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80Codec.java
@@ -30,8 +30,11 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.FSTLoadMode;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
@@ -58,6 +61,7 @@ public class Lucene80Codec extends Codec {
private final SegmentInfoFormat segmentInfosFormat = new Lucene70SegmentInfoFormat();
private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
+ private final PostingsFormat defaultFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
@@ -79,7 +83,7 @@ public class Lucene80Codec extends Codec {
* Instantiates a new codec.
*/
public Lucene80Codec() {
- this(Mode.BEST_SPEED);
+ this(Mode.BEST_SPEED, FSTLoadMode.AUTO);
}
/**
@@ -88,9 +92,11 @@ public class Lucene80Codec extends Codec {
* @param mode stored fields compression mode to use for newly
* flushed/merged segments.
*/
- public Lucene80Codec(Mode mode) {
+ public Lucene80Codec(Mode mode, FSTLoadMode fstLoadMode) {
super("Lucene80");
this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode));
+ this.defaultFormat = new Lucene50PostingsFormat(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
+ BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE, Objects.requireNonNull(fstLoadMode));
}
@Override
@@ -164,7 +170,6 @@ public class Lucene80Codec extends Codec {
return docValuesFormat;
}
- private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene50");
private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene80");
private final NormsFormat normsFormat = new Lucene80NormsFormat();
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
index 9d2d5b8..22b400a 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat.java
@@ -26,6 +26,9 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.asserting.AssertingCodec;
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.blocktree.FieldReader;
import org.apache.lucene.codecs.blocktree.Stats;
import org.apache.lucene.codecs.lucene50.Lucene50ScoreSkipReader.MutableImpactList;
@@ -125,6 +128,119 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
}
}
}
+
+ public void testDisableFSTOffHeap() throws IOException {
+ Path tempDir = createTempDir();
+ try (Directory d = MMapDirectory.open(tempDir)) {
+ try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())).setCodec(new AssertingCodec() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return new Lucene50PostingsFormat(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE, Lucene50PostingsFormat.FSTLoadMode.ON_HEAP);
+ }
+ }))) {
+ assumeTrue("only works with mmap directory", d instanceof MMapDirectory);
+ DirectoryReader readerFromWriter = DirectoryReader.open(w);
+ for (int i = 0; i < 50; i++) {
+ Document doc = new Document();
+ doc.add(newStringField("id", "" + i, Field.Store.NO));
+ doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
+ doc.add(newStringField("field", Character.toString((char) (98 + i)), Field.Store.NO));
+ if (rarely()) {
+ w.addDocument(doc);
+ } else {
+ w.updateDocument(new Term("id", "" + i), doc);
+ }
+ if (random().nextBoolean()) {
+ w.commit();
+ }
+ if (random().nextBoolean()) {
+ DirectoryReader newReader = DirectoryReader.openIfChanged(readerFromWriter);
+ if (newReader != null) {
+ readerFromWriter.close();
+ readerFromWriter = newReader;
+ }
+ for (LeafReaderContext leaf : readerFromWriter.leaves()) {
+ FieldReader field = (FieldReader) leaf.reader().terms("field");
+ FieldReader id = (FieldReader) leaf.reader().terms("id");
+ assertFalse(id.isFstOffHeap());
+ assertFalse(field.isFstOffHeap());
+ }
+ }
+ }
+ readerFromWriter.close();
+ w.forceMerge(1);
+ w.commit();
+ }
+ try (DirectoryReader r = DirectoryReader.open(d)) {
+ assertEquals(1, r.leaves().size());
+ FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
+ FieldReader id = (FieldReader) r.leaves().get(0).reader().terms("id");
+ assertFalse(id.isFstOffHeap());
+ assertFalse(field.isFstOffHeap());
+ }
+ }
+ }
+
+ public void testAlwaysFSTOffHeap() throws IOException {
+ boolean alsoLoadIdOffHeap = random().nextBoolean();
+ Lucene50PostingsFormat.FSTLoadMode loadMode;
+ if (alsoLoadIdOffHeap) {
+ loadMode = Lucene50PostingsFormat.FSTLoadMode.OFF_HEAP;
+ } else {
+ loadMode = Lucene50PostingsFormat.FSTLoadMode.OPTIMIZE_UPDATES_OFF_HEAP;
+ }
+ try (Directory d = newDirectory()) { // any directory should work now
+ try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())).setCodec(new AssertingCodec() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return new Lucene50PostingsFormat(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE, loadMode);
+ }
+ }))) {
+ DirectoryReader readerFromWriter = DirectoryReader.open(w);
+ for (int i = 0; i < 50; i++) {
+ Document doc = new Document();
+ doc.add(newStringField("id", "" + i, Field.Store.NO));
+ doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
+ doc.add(newStringField("field", Character.toString((char) (98 + i)), Field.Store.NO));
+ if (rarely()) {
+ w.addDocument(doc);
+ } else {
+ w.updateDocument(new Term("id", "" + i), doc);
+ }
+ if (random().nextBoolean()) {
+ w.commit();
+ }
+ if (random().nextBoolean()) {
+ DirectoryReader newReader = DirectoryReader.openIfChanged(readerFromWriter);
+ if (newReader != null) {
+ readerFromWriter.close();
+ readerFromWriter = newReader;
+ }
+ for (LeafReaderContext leaf : readerFromWriter.leaves()) {
+ FieldReader field = (FieldReader) leaf.reader().terms("field");
+ FieldReader id = (FieldReader) leaf.reader().terms("id");
+ if (alsoLoadIdOffHeap) {
+ assertTrue(id.isFstOffHeap());
+ } else {
+ assertFalse(id.isFstOffHeap());
+ }
+ assertTrue(field.isFstOffHeap());
+ }
+ }
+ }
+ readerFromWriter.close();
+ w.forceMerge(1);
+ w.commit();
+ }
+ try (DirectoryReader r = DirectoryReader.open(d)) {
+ assertEquals(1, r.leaves().size());
+ FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
+ FieldReader id = (FieldReader) r.leaves().get(0).reader().terms("id");
+ assertTrue(id.isFstOffHeap());
+ assertTrue(field.isFstOffHeap());
+ }
+ }
+ }
/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
@@ -152,7 +268,7 @@ public class TestBlockPostingsFormat extends BasePostingsFormatTestCase {
private void shouldFail(int minItemsInBlock, int maxItemsInBlock) {
expectThrows(IllegalArgumentException.class, () -> {
- new Lucene50PostingsFormat(minItemsInBlock, maxItemsInBlock);
+ new Lucene50PostingsFormat(minItemsInBlock, maxItemsInBlock, Lucene50PostingsFormat.FSTLoadMode.AUTO);
});
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
index 48864e3..d5c0ecb 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
@@ -33,7 +33,7 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene80Codec(Mode.BEST_COMPRESSION);
+ return new Lucene80Codec(Mode.BEST_COMPRESSION, Lucene50PostingsFormat.FSTLoadMode.AUTO);
}
/**
@@ -44,7 +44,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
Directory dir = newDirectory();
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(new Lucene80Codec(RandomPicks.randomFrom(random(), Mode.values())));
+ iwc.setCodec(new Lucene80Codec(RandomPicks.randomFrom(random(), Mode.values()), Lucene50PostingsFormat.FSTLoadMode.AUTO));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -69,9 +69,13 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
dir.close();
}
- public void testInvalidOptions() throws Exception {
+ public void testInvalidOptions() {
expectThrows(NullPointerException.class, () -> {
- new Lucene80Codec(null);
+ new Lucene80Codec(null, Lucene50PostingsFormat.FSTLoadMode.AUTO);
+ });
+
+ expectThrows(NullPointerException.class, () -> {
+ new Lucene80Codec(Mode.BEST_COMPRESSION, null);
});
expectThrows(NullPointerException.class, () -> {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
index f34aa82..a2a84d2 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardCodec.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.cheapbastard;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.util.TestUtil;
/** Codec that tries to use as little ram as possible because he spent all his money on beer */
@@ -26,7 +27,7 @@ import org.apache.lucene.util.TestUtil;
public class CheapBastardCodec extends FilterCodec {
// TODO: would be better to have no terms index at all and bsearch a terms dict
- private final PostingsFormat postings = TestUtil.getDefaultPostingsFormat(100, 200);
+ private final PostingsFormat postings = TestUtil.getDefaultPostingsFormat(100, 200, Lucene50PostingsFormat.FSTLoadMode.OFF_HEAP);
public CheapBastardCodec() {
super("CheapBastard", TestUtil.getDefaultCodec());
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
index 6f57a2e..5bf9ed4 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
@@ -38,6 +38,7 @@ import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsReader;
import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
import org.apache.lucene.codecs.memory.FSTOrdTermsReader;
@@ -315,7 +316,7 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
boolean success = false;
try {
- fields = new BlockTreeTermsReader(postingsReader, state);
+ fields = new BlockTreeTermsReader(postingsReader, state, Lucene50PostingsFormat.FSTLoadMode.AUTO);
success = true;
} finally {
if (!success) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index ec3c323..87126ca 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -26,6 +26,7 @@ import java.util.Map;
import java.util.Random;
import java.util.Set;
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
@@ -40,6 +41,7 @@ import org.apache.lucene.codecs.blockterms.LuceneVarGapDocFreqInterval;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.bloom.TestBloomFilteredLucenePostings;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.codecs.memory.DirectDocValuesFormat;
@@ -187,7 +189,8 @@ public class RandomCodec extends AssertingCodec {
bkdSplitRandomSeed = random.nextInt();
add(avoidCodecs,
- TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock),
+ TestUtil.getDefaultPostingsFormat(minItemsPerBlock, maxItemsPerBlock,
+ RandomPicks.randomFrom(random, Lucene50PostingsFormat.FSTLoadMode.values())),
new FSTPostingsFormat(),
new FSTOrdPostingsFormat(),
new DirectPostingsFormat(LuceneTestCase.rarely(random) ? 1 : (LuceneTestCase.rarely(random) ? Integer.MAX_VALUE : maxItemsPerBlock),
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index 87b8f97..87d3776 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -42,6 +42,7 @@ import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat;
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
import org.apache.lucene.codecs.compressing.CompressingCodec;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
@@ -189,7 +190,8 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
} else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
codec = CompressingCodec.randomInstance(random);
} else if ("Lucene80".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene80"))) {
- codec = new Lucene80Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()));
+ codec = new Lucene80Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values()),
+ RandomPicks.randomFrom(random, Lucene50PostingsFormat.FSTLoadMode.values()));
} else if (!"random".equals(TEST_CODEC)) {
codec = Codec.forName(TEST_CODEC);
} else if ("random".equals(TEST_POSTINGSFORMAT)) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
index 5350890..280a074 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@@ -911,8 +911,8 @@ public final class TestUtil {
* Returns the actual default postings format (e.g. LuceneMNPostingsFormat for this version of Lucene.
* @lucene.internal this may disappear at any time
*/
- public static PostingsFormat getDefaultPostingsFormat(int minItemsPerBlock, int maxItemsPerBlock) {
- return new Lucene50PostingsFormat(minItemsPerBlock, maxItemsPerBlock);
+ public static PostingsFormat getDefaultPostingsFormat(int minItemsPerBlock, int maxItemsPerBlock, Lucene50PostingsFormat.FSTLoadMode offHeapFST) {
+ return new Lucene50PostingsFormat(minItemsPerBlock, maxItemsPerBlock, offHeapFST);
}
/** Returns a random postings format that supports term ordinals */
diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
index 5adc161..487d06e 100644
--- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
@@ -23,6 +23,7 @@ import java.util.Locale;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.codecs.lucene80.Lucene80Codec;
import org.apache.solr.common.SolrException.ErrorCode;
@@ -91,7 +92,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.debug("Using default compressionMode: " + compressionMode);
}
- codec = new Lucene80Codec(compressionMode) {
+ codec = new Lucene80Codec(compressionMode, Lucene50PostingsFormat.FSTLoadMode.AUTO) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);