You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2020/08/26 09:04:48 UTC
[lucene-solr] branch master updated: LUCENE-9447: Make
BEST_COMPRESSION better with highly compressible data. (#1762)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 913976d LUCENE-9447: Make BEST_COMPRESSION better with highly compressible data. (#1762)
913976d is described below
commit 913976dbf78b3a6d937b3345e6231fee77e81fd4
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Wed Aug 26 11:04:34 2020 +0200
LUCENE-9447: Make BEST_COMPRESSION better with highly compressible data. (#1762)
This makes BEST_COMPRESSION split blocks into sub blocks and use preset
dictionaries to improve compression ratios.
---
lucene/CHANGES.txt | 3 +
.../lucene50/Lucene50StoredFieldsFormat.java | 11 +-
.../lucene/codecs/lucene84/Lucene84Codec.java | 2 +-
.../lucene/codecs/lucene86/Lucene86Codec.java | 2 +-
.../org/apache/lucene/codecs/lucene86/package.html | 25 ++
.../services/org.apache.lucene.codecs.Codec | 1 +
.../lucene50/Lucene50RWStoredFieldsFormat.java | 51 +++
.../lucene50/TestLucene50StoredFieldsFormat.java | 4 +-
...tLucene50StoredFieldsFormatHighCompression.java | 8 +-
...estLucene50StoredFieldsFormatMergeInstance.java | 0
.../lucene70/TestLucene70SegmentInfoFormat.java | 10 +-
.../lucene/codecs/lucene84/Lucene84RWCodec.java | 7 +
.../Lucene86RWCodec.java} | 30 +-
.../benchmark/byTask/tasks/CreateIndexTask.java | 4 +-
.../src/java/org/apache/lucene/codecs/Codec.java | 2 +-
.../codecs/lucene50/Lucene50TermVectorsFormat.java | 3 +-
.../lucene/codecs/lucene86/package-info.java | 6 +-
.../Lucene87Codec.java} | 21 +-
.../lucene87/Lucene87StoredFieldsFormat.java | 378 +++++++++++++++++++++
.../{lucene86 => lucene87}/package-info.java | 10 +-
.../services/org.apache.lucene.codecs.Codec | 2 +-
.../codecs/lucene80/TestLucene80NormsFormat.java | 4 +-
.../codecs/lucene86/TestLucene86PointsFormat.java | 2 +-
.../TestLucene87StoredFieldsFormat.java} | 5 +-
...Lucene87StoredFieldsFormatHighCompression.java} | 18 +-
...stLucene87StoredFieldsFormatMergeInstance.java} | 4 +-
.../org/apache/lucene/index/TestPointValues.java | 4 +-
.../org/apache/lucene/search/TestBoolean2.java | 9 +-
.../document/TestFloatPointNearestNeighbor.java | 3 +-
.../test/org/apache/lucene/search/TestNearest.java | 3 +-
.../search/suggest/document/TestSuggestField.java | 4 +-
.../codecs/compressing/CompressingCodec.java | 4 +-
.../DeflateWithPresetCompressingCodec.java} | 27 +-
.../apache/lucene/geo/BaseGeoPointTestCase.java | 4 +-
.../org/apache/lucene/geo/BaseXYPointTestCase.java | 4 +-
.../util/TestRuleSetupAndRestoreClassEnv.java | 8 +-
.../src/java/org/apache/lucene/util/TestUtil.java | 4 +-
.../services/org.apache.lucene.codecs.Codec | 1 +
.../org/apache/solr/core/SchemaCodecFactory.java | 6 +-
39 files changed, 578 insertions(+), 116 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e2c646c..0e16e67 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -198,6 +198,9 @@ Optimizations
* LUCENE-9395: ConstantValuesSource now shares a single DoubleValues
instance across all segments (Tony Xu)
+* LUCENE-9447: BEST_COMPRESSION now provides higher compression ratios on highly
+ compressible data. (Adrien Grand)
+
* LUCENE-9373: FunctionMatchQuery now accepts a "matchCost" optimization hint.
(Maxim Glazkov, David Smiley)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
similarity index 91%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
rename to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
index 035fbd9..6f3b162 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
@@ -100,7 +100,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
* larger than (<code>2<sup>31</sup> - 2<sup>14</sup></code>) bytes.
* @lucene.experimental
*/
-public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
+public class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
/** Configuration option for stored fields. */
public static enum Mode {
@@ -126,7 +126,7 @@ public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
}
@Override
- public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+ public final StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
String value = si.getAttribute(MODE_KEY);
if (value == null) {
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
@@ -137,12 +137,7 @@ public final class Lucene50StoredFieldsFormat extends StoredFieldsFormat {
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
- String previous = si.putAttribute(MODE_KEY, mode.name());
- if (previous != null && previous.equals(mode.name()) == false) {
- throw new IllegalStateException("found existing value for " + MODE_KEY + " for segment: " + si.name +
- "old=" + previous + ", new=" + mode.name());
- }
- return impl(mode).fieldsWriter(directory, si, context);
+ throw new UnsupportedOperationException("Old codecs may only be used for reading");
}
StoredFieldsFormat impl(Mode mode) {
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java
index bef5633..90918c1 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene84/Lucene84Codec.java
@@ -97,7 +97,7 @@ public class Lucene84Codec extends Codec {
}
@Override
- public final StoredFieldsFormat storedFieldsFormat() {
+ public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
similarity index 99%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
index 3f69874..e297465 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
@@ -97,7 +97,7 @@ public class Lucene86Codec extends Codec {
}
@Override
- public final StoredFieldsFormat storedFieldsFormat() {
+ public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/package.html b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/package.html
new file mode 100644
index 0000000..10560c6
--- /dev/null
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene86/package.html
@@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Lucene 8.6 file format.
+</body>
+</html>
diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index cf7a945..d673233 100644
--- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -15,3 +15,4 @@
org.apache.lucene.codecs.lucene80.Lucene80Codec
org.apache.lucene.codecs.lucene84.Lucene84Codec
+org.apache.lucene.codecs.lucene86.Lucene86Codec
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWStoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWStoredFieldsFormat.java
new file mode 100644
index 0000000..82d1c96
--- /dev/null
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/Lucene50RWStoredFieldsFormat.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene50;
+
+import java.io.IOException;
+
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+
+/**
+ * RW impersonation of Lucene50StoredFieldsFormat.
+ */
+public final class Lucene50RWStoredFieldsFormat extends Lucene50StoredFieldsFormat {
+
+ /** No-argument constructor. */
+ public Lucene50RWStoredFieldsFormat() {
+ super();
+ }
+
+ /** Constructor that takes a mode. */
+ public Lucene50RWStoredFieldsFormat(Lucene50StoredFieldsFormat.Mode mode) {
+ super(mode);
+ }
+
+ @Override
+ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
+ String previous = si.putAttribute(MODE_KEY, mode.name());
+ if (previous != null && previous.equals(mode.name()) == false) {
+ throw new IllegalStateException("found existing value for " + MODE_KEY + " for segment: " + si.name +
+ "old=" + previous + ", new=" + mode.name());
+ }
+ return impl(mode).fieldsWriter(directory, si, context);
+ }
+
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
similarity index 92%
copy from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
index 4c7bed4..fec9e43 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
@@ -18,12 +18,12 @@ package org.apache.lucene.codecs.lucene50;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.lucene86.Lucene86RWCodec;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
-import org.apache.lucene.util.TestUtil;
public class TestLucene50StoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return TestUtil.getDefaultCodec();
+ return new Lucene86RWCodec();
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
similarity index 91%
copy from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
index cccee73..41b4b84 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
@@ -20,7 +20,7 @@ package org.apache.lucene.codecs.lucene50;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene86.Lucene86RWCodec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
@@ -32,7 +32,7 @@ import org.apache.lucene.store.Directory;
public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene86Codec(Mode.BEST_COMPRESSION);
+ return new Lucene86RWCodec(Mode.BEST_COMPRESSION);
}
/**
@@ -43,7 +43,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
Directory dir = newDirectory();
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(new Lucene86Codec(RandomPicks.randomFrom(random(), Mode.values())));
+ iwc.setCodec(new Lucene86RWCodec(RandomPicks.randomFrom(random(), Mode.values())));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -70,7 +70,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
public void testInvalidOptions() {
expectThrows(NullPointerException.class, () -> {
- new Lucene86Codec(null);
+ new Lucene86RWCodec(null);
});
expectThrows(NullPointerException.class, () -> {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java
similarity index 100%
copy from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java
index ac516a1..d9dd019 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene70/TestLucene70SegmentInfoFormat.java
@@ -18,8 +18,7 @@
package org.apache.lucene.codecs.lucene70;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.lucene84.Lucene84RWCodec;
import org.apache.lucene.index.BaseSegmentInfoFormatTestCase;
import org.apache.lucene.util.Version;
@@ -32,11 +31,6 @@ public class TestLucene70SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
@Override
protected Codec getCodec() {
- return new FilterCodec("Lucene84", Codec.forName("Lucene84")) {
- @Override
- public SegmentInfoFormat segmentInfoFormat() {
- return new Lucene70RWSegmentInfoFormat();
- }
- };
+ return new Lucene84RWCodec();
}
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
index c1fd467..0f74e79 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
@@ -18,6 +18,8 @@ package org.apache.lucene.codecs.lucene84;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50RWStoredFieldsFormat;
import org.apache.lucene.codecs.lucene60.Lucene60RWPointsFormat;
import org.apache.lucene.codecs.lucene70.Lucene70RWSegmentInfoFormat;
@@ -36,4 +38,9 @@ public class Lucene84RWCodec extends Lucene84Codec {
return new Lucene70RWSegmentInfoFormat();
}
+ @Override
+ public StoredFieldsFormat storedFieldsFormat() {
+ return new Lucene50RWStoredFieldsFormat();
+ }
+
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene86/Lucene86RWCodec.java
similarity index 52%
copy from lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene86/Lucene86RWCodec.java
index c1fd467..72e2bee 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene86/Lucene86RWCodec.java
@@ -14,26 +14,32 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene84;
+package org.apache.lucene.codecs.lucene86;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.SegmentInfoFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60RWPointsFormat;
-import org.apache.lucene.codecs.lucene70.Lucene70RWSegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50RWStoredFieldsFormat;
+import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
/**
- * RW impersonation of {@link Lucene84Codec}.
+ * RW impersonation of {@link Lucene86Codec}.
*/
-public class Lucene84RWCodec extends Lucene84Codec {
+public class Lucene86RWCodec extends Lucene86Codec {
- @Override
- public PointsFormat pointsFormat() {
- return new Lucene60RWPointsFormat();
+ private final StoredFieldsFormat storedFieldsFormat;
+
+ /** No arguments constructor. */
+ public Lucene86RWCodec() {
+ storedFieldsFormat = new Lucene50RWStoredFieldsFormat();
+ }
+
+ /** Constructor that takes a mode. */
+ public Lucene86RWCodec(Lucene50StoredFieldsFormat.Mode mode) {
+ storedFieldsFormat = new Lucene50RWStoredFieldsFormat(mode);
}
@Override
- public SegmentInfoFormat segmentInfoFormat() {
- return new Lucene70RWSegmentInfoFormat();
+ public StoredFieldsFormat storedFieldsFormat() {
+ return storedFieldsFormat;
}
}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
index db64781..e44b046 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
@@ -29,7 +29,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene87.Lucene87Codec;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
@@ -138,7 +138,7 @@ public class CreateIndexTask extends PerfTask {
if (defaultCodec == null && postingsFormat != null) {
try {
final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat);
- iwConf.setCodec(new Lucene86Codec() {
+ iwConf.setCodec(new Lucene87Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postingsFormatChosen;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 8b5ca14..14fa793 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -57,7 +57,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
}
// TODO: should we use this, or maybe a system property is better?
- static Codec defaultCodec = LOADER.lookup("Lucene86");
+ static Codec defaultCodec = LOADER.lookup("Lucene87");
}
private final String name;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java
index 00412d5..9b65fb4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50TermVectorsFormat.java
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs.lucene50;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.compressing.FieldsIndexWriter;
+import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat;
import org.apache.lucene.codecs.compressing.CompressingTermVectorsFormat;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.store.DataOutput;
@@ -29,7 +30,7 @@ import org.apache.lucene.util.packed.PackedInts;
/**
* Lucene 5.0 {@link TermVectorsFormat term vectors format}.
* <p>
- * Very similarly to {@link Lucene50StoredFieldsFormat}, this format is based
+ * Very similarly to {@link Lucene87StoredFieldsFormat}, this format is based
* on compressed chunks of data, with document-level granularity so that a
* document can never span across distinct chunks. Moreover, data is made as
* compact as possible:<ul>
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java
index 19be7eb..13f35a1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java
@@ -137,7 +137,7 @@
* This contains the set of field names used in the index.
* </li>
* <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Stored Field values}.
+ * Stored Field values.
* This contains, for each document, a list of attribute-value pairs, where the attributes
* are field names. These are used to store auxiliary information about the document, such as
* its title, url, or an identifier to access a database. The set of stored fields are what is
@@ -250,12 +250,12 @@
* <td>Stores information about the fields</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Index}</td>
+ * <td>Field Index</td>
* <td>.fdx</td>
* <td>Contains pointers to field data</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Data}</td>
+ * <td>Field Data</td>
* <td>.fdt</td>
* <td>The stored fields for documents</td>
* </tr>
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/Lucene87Codec.java
similarity index 90%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene87/Lucene87Codec.java
index 3f69874..5ff4073 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/Lucene87Codec.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene86;
+package org.apache.lucene.codecs.lucene87;
import java.util.Objects;
@@ -33,11 +33,12 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
+import org.apache.lucene.codecs.lucene86.Lucene86PointsFormat;
+import org.apache.lucene.codecs.lucene86.Lucene86SegmentInfoFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
@@ -52,7 +53,7 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
*
* @lucene.experimental
*/
-public class Lucene86Codec extends Codec {
+public class Lucene87Codec extends Codec {
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene60FieldInfosFormat();
private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
@@ -64,14 +65,14 @@ public class Lucene86Codec extends Codec {
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
- return Lucene86Codec.this.getPostingsFormatForField(field);
+ return Lucene87Codec.this.getPostingsFormatForField(field);
}
};
private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
- return Lucene86Codec.this.getDocValuesFormatForField(field);
+ return Lucene87Codec.this.getDocValuesFormatForField(field);
}
};
@@ -80,8 +81,8 @@ public class Lucene86Codec extends Codec {
/**
* Instantiates a new codec.
*/
- public Lucene86Codec() {
- this(Lucene50StoredFieldsFormat.Mode.BEST_SPEED);
+ public Lucene87Codec() {
+ this(Lucene87StoredFieldsFormat.Mode.BEST_SPEED);
}
/**
@@ -90,9 +91,9 @@ public class Lucene86Codec extends Codec {
* @param mode stored fields compression mode to use for newly
* flushed/merged segments.
*/
- public Lucene86Codec(Lucene50StoredFieldsFormat.Mode mode) {
- super("Lucene86");
- this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Objects.requireNonNull(mode));
+ public Lucene87Codec(Lucene87StoredFieldsFormat.Mode mode) {
+ super("Lucene87");
+ this.storedFieldsFormat = new Lucene87StoredFieldsFormat(Objects.requireNonNull(mode));
this.defaultFormat = new Lucene84PostingsFormat();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene87/Lucene87StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/Lucene87StoredFieldsFormat.java
new file mode 100644
index 0000000..c2bbced
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/Lucene87StoredFieldsFormat.java
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene87;
+
+import java.io.IOException;
+import java.util.Objects;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.StoredFieldsReader;
+import org.apache.lucene.codecs.StoredFieldsWriter;
+import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat;
+import org.apache.lucene.codecs.compressing.CompressionMode;
+import org.apache.lucene.codecs.compressing.Compressor;
+import org.apache.lucene.codecs.compressing.Decompressor;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.StoredFieldVisitor;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.packed.DirectMonotonicWriter;
+
+/**
+ * Lucene 8.7 stored fields format.
+ *
+ * <p><b>Principle</b>
+ * <p>This {@link StoredFieldsFormat} compresses blocks of documents in
+ * order to improve the compression ratio compared to document-level
+ * compression. It uses the <a href="http://code.google.com/p/lz4/">LZ4</a>
+ * compression algorithm by default in 16KB blocks, which is fast to compress
+ * and very fast to decompress data. Although the default compression method
+ * that is used ({@link Mode#BEST_SPEED BEST_SPEED}) focuses more on speed than on
+ * compression ratio, it should provide interesting compression ratios
+ * for redundant inputs (such as log files, HTML or plain text). For higher
+ * compression, you can choose ({@link Mode#BEST_COMPRESSION BEST_COMPRESSION}),
+ * which uses the <a href="http://en.wikipedia.org/wiki/DEFLATE">DEFLATE</a>
+ * algorithm with 48kB blocks and shared dictionaries for a better ratio at the
+ * expense of slower performance. These two options can be configured like this:
+ * <pre class="prettyprint">
+ * // the default: for high performance
+ * indexWriterConfig.setCodec(new Lucene87Codec(Mode.BEST_SPEED));
+ * // instead for higher performance (but slower):
+ * // indexWriterConfig.setCodec(new Lucene87Codec(Mode.BEST_COMPRESSION));
+ * </pre>
+ * <p><b>File formats</b>
+ * <p>Stored fields are represented by three files:
+ * <ol>
+ * <li><a id="field_data"></a>
+ * <p>A fields data file (extension <code>.fdt</code>). This file stores a compact
+ * representation of documents in compressed blocks of 16KB or more. When
+ * writing a segment, documents are appended to an in-memory <code>byte[]</code>
+ * buffer. When its size reaches 16KB or more, some metadata about the documents
+ * is flushed to disk, immediately followed by a compressed representation of
+ * the buffer using the
+ * <a href="https://github.com/lz4/lz4">LZ4</a>
+ * <a href="http://fastcompression.blogspot.fr/2011/05/lz4-explained.html">compression format</a>.</p>
+ * <p>Notes
+ * <ul>
+ * <li>When at least one document in a chunk is large enough so that the chunk
+ * is larger than 32KB, the chunk will actually be compressed in several LZ4
+ * blocks of 16KB. This allows {@link StoredFieldVisitor}s which are only
+ * interested in the first fields of a document to not have to decompress 10MB
+ * of data if the document is 10MB, but only 16KB.</li>
+ * <li>Given that the original lengths are written in the metadata of the chunk,
+ * the decompressor can leverage this information to stop decoding as soon as
+ * enough data has been decompressed.</li>
+ * <li>In case documents are incompressible, the overhead of the compression format
+ * is less than 0.5%.</li>
+ * </ul>
+ * </li>
+ * <li><a id="field_index"></a>
+ * <p>A fields index file (extension <code>.fdx</code>). This file stores two
+ * {@link DirectMonotonicWriter monotonic arrays}, one for the first doc IDs of
+ * each block of compressed documents, and another one for the corresponding
+ * offsets on disk. At search time, the array containing doc IDs is
+ * binary-searched in order to find the block that contains the expected doc ID,
+ * and the associated offset on disk is retrieved from the second array.</p>
+ * <li><a id="field_meta"></a>
+ * <p>A fields meta file (extension <code>.fdm</code>). This file stores metadata
+ * about the monotonic arrays stored in the index file.</p>
+ * </li>
+ * </ol>
+ * <p><b>Known limitations</b>
+ * <p>This {@link StoredFieldsFormat} does not support individual documents
+ * larger than (<code>2<sup>31</sup> - 2<sup>14</sup></code>) bytes.
+ * @lucene.experimental
+ */
+public class Lucene87StoredFieldsFormat extends StoredFieldsFormat {
+
+ /** Configuration option for stored fields. */
+ public static enum Mode {
+ /** Trade compression ratio for retrieval speed. */
+ BEST_SPEED,
+ /** Trade retrieval speed for compression ratio. */
+ BEST_COMPRESSION
+ }
+
+ /** Attribute key for compression mode. */
+ public static final String MODE_KEY = Lucene87StoredFieldsFormat.class.getSimpleName() + ".mode";
+
+ final Mode mode;
+
+ /** Stored fields format with default options */
+ public Lucene87StoredFieldsFormat() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /** Stored fields format with specified mode */
+ public Lucene87StoredFieldsFormat(Mode mode) {
+ this.mode = Objects.requireNonNull(mode);
+ }
+
+ @Override
+ public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
+ String value = si.getAttribute(MODE_KEY);
+ if (value == null) {
+ throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
+ }
+ Mode mode = Mode.valueOf(value);
+ return impl(mode).fieldsReader(directory, si, fn, context);
+ }
+
+ @Override
+ public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
+ String previous = si.putAttribute(MODE_KEY, mode.name());
+ if (previous != null && previous.equals(mode.name()) == false) {
+ throw new IllegalStateException("found existing value for " + MODE_KEY + " for segment: " + si.name +
+ "old=" + previous + ", new=" + mode.name());
+ }
+ return impl(mode).fieldsWriter(directory, si, context);
+ }
+
+ StoredFieldsFormat impl(Mode mode) {
+ switch (mode) {
+ case BEST_SPEED:
+ return new CompressingStoredFieldsFormat("Lucene87StoredFieldsFastData", CompressionMode.FAST, 16*1024, 128, 10);
+ case BEST_COMPRESSION:
+ return new CompressingStoredFieldsFormat("Lucene87StoredFieldsHighData", BEST_COMPRESSION_MODE, BEST_COMPRESSION_BLOCK_LENGTH, 512, 10);
+ default: throw new AssertionError();
+ }
+ }
+
+ // 8kB seems to be a good trade-off between higher compression rates by not
+ // having to fully bootstrap a dictionary, and indexing rate by not spending
+ // too much CPU initializing data-structures to find strings in this preset
+ // dictionary.
+ private static final int BEST_COMPRESSION_DICT_LENGTH = 8 * 1024;
+ // 48kB seems like a nice trade-off because it's small enough to keep
+ // retrieval fast, yet sub blocks can find strings in a window of 26kB of
+ // data on average (the window grows from 8kB to 32kB in the first 24kB, and
+ // then DEFLATE can use 32kB for the last 24kB) which is close enough to the
+ // maximum window length of DEFLATE of 32kB.
+ private static final int BEST_COMPRESSION_SUB_BLOCK_LENGTH = 48 * 1024;
+ // We shoot for 10 sub blocks per block, which should hopefully amortize the
+ // space overhead of having the first 8kB compressed without any preset dict,
+ // and then remove 8kB in order to avoid creating a tiny 11th sub block if
+ // documents are small.
+ private static final int BEST_COMPRESSION_BLOCK_LENGTH = BEST_COMPRESSION_DICT_LENGTH + 10 * BEST_COMPRESSION_SUB_BLOCK_LENGTH - 8 * 1024;
+
+ /** Compression mode for {@link Mode#BEST_COMPRESSION} */
+ public static final DeflateWithPresetDict BEST_COMPRESSION_MODE = new DeflateWithPresetDict(BEST_COMPRESSION_DICT_LENGTH, BEST_COMPRESSION_SUB_BLOCK_LENGTH);
+
+ /**
+ * A compression mode that trades speed for compression ratio. Although
+ * compression and decompression might be slow, this compression mode should
+ * provide a good compression ratio. This mode might be interesting if/when
+ * your index size is much bigger than your OS cache.
+ */
+ public static class DeflateWithPresetDict extends CompressionMode {
+
+ private final int dictLength, subBlockLength;
+
+ /** Sole constructor. */
+ public DeflateWithPresetDict(int dictLength, int subBlockLength) {
+ this.dictLength = dictLength;
+ this.subBlockLength = subBlockLength;
+ }
+
+ @Override
+ public Compressor newCompressor() {
+ // notes:
+ // 3 is the highest level that doesn't have lazy match evaluation
+ // 6 is the default, higher than that is just a waste of cpu
+ return new DeflateWithPresetDictCompressor(6, dictLength, subBlockLength);
+ }
+
+ @Override
+ public Decompressor newDecompressor() {
+ return new DeflateWithPresetDictDecompressor();
+ }
+
+ @Override
+ public String toString() {
+ return "BEST_COMPRESSION";
+ }
+
+ };
+
+ private static final class DeflateWithPresetDictDecompressor extends Decompressor {
+
+ byte[] compressed;
+
+ DeflateWithPresetDictDecompressor() {
+ compressed = new byte[0];
+ }
+
+ private void doDecompress(DataInput in, Inflater decompressor, BytesRef bytes) throws IOException {
+ final int compressedLength = in.readVInt();
+ if (compressedLength == 0) {
+ return;
+ }
+ // pad with extra "dummy byte": see javadocs for using Inflater(true)
+ // we do it for compliance, but it's unnecessary for years in zlib.
+ final int paddedLength = compressedLength + 1;
+ compressed = ArrayUtil.grow(compressed, paddedLength);
+ in.readBytes(compressed, 0, compressedLength);
+ compressed[compressedLength] = 0; // explicitly set dummy byte to 0
+
+ // extra "dummy byte"
+ decompressor.setInput(compressed, 0, paddedLength);
+ try {
+ bytes.length += decompressor.inflate(bytes.bytes, bytes.length, bytes.bytes.length - bytes.length);
+ } catch (DataFormatException e) {
+ throw new IOException(e);
+ }
+ if (decompressor.finished() == false) {
+ throw new CorruptIndexException("Invalid decoder state: needsInput=" + decompressor.needsInput()
+ + ", needsDict=" + decompressor.needsDictionary(), in);
+ }
+ }
+
+ @Override
+ public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException {
+ assert offset + length <= originalLength;
+ if (length == 0) {
+ bytes.length = 0;
+ return;
+ }
+ final int dictLength = in.readVInt();
+ final int blockLength = in.readVInt();
+ bytes.bytes = ArrayUtil.grow(bytes.bytes, dictLength);
+ bytes.offset = bytes.length = 0;
+
+ final Inflater decompressor = new Inflater(true);
+ try {
+ // Read the dictionary
+ doDecompress(in, decompressor, bytes);
+ if (dictLength != bytes.length) {
+ throw new CorruptIndexException("Unexpected dict length", in);
+ }
+
+ int offsetInBlock = dictLength;
+ int offsetInBytesRef = offset;
+
+ // Skip unneeded blocks
+ while (offsetInBlock + blockLength < offset) {
+ final int compressedLength = in.readVInt();
+ in.skipBytes(compressedLength);
+ offsetInBlock += blockLength;
+ offsetInBytesRef -= blockLength;
+ }
+
+ // Read blocks that intersect with the interval we need
+ while (offsetInBlock < offset + length) {
+ bytes.bytes = ArrayUtil.grow(bytes.bytes, bytes.length + blockLength);
+ decompressor.reset();
+ decompressor.setDictionary(bytes.bytes, 0, dictLength);
+ doDecompress(in, decompressor, bytes);
+ offsetInBlock += blockLength;
+ }
+
+ bytes.offset = offsetInBytesRef;
+ bytes.length = length;
+ assert bytes.isValid();
+ } finally {
+ decompressor.end();
+ }
+ }
+
+ @Override
+ public Decompressor clone() {
+ return new DeflateWithPresetDictDecompressor();
+ }
+
+ }
+
+ private static class DeflateWithPresetDictCompressor extends Compressor {
+
+ final int dictLength;
+ final int blockLength;
+ final Deflater compressor;
+ byte[] compressed;
+ boolean closed;
+
+ DeflateWithPresetDictCompressor(int level, int dictLength, int blockLength) {
+ compressor = new Deflater(level, true);
+ compressed = new byte[64];
+ this.dictLength = dictLength;
+ this.blockLength = blockLength;
+ }
+
+ private void doCompress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
+ if (len == 0) {
+ out.writeVInt(0);
+ return;
+ }
+ compressor.setInput(bytes, off, len);
+ compressor.finish();
+ if (compressor.needsInput()) {
+ throw new IllegalStateException();
+ }
+
+ int totalCount = 0;
+ for (;;) {
+ final int count = compressor.deflate(compressed, totalCount, compressed.length - totalCount);
+ totalCount += count;
+ assert totalCount <= compressed.length;
+ if (compressor.finished()) {
+ break;
+ } else {
+ compressed = ArrayUtil.grow(compressed);
+ }
+ }
+
+ out.writeVInt(totalCount);
+ out.writeBytes(compressed, totalCount);
+ }
+
+ @Override
+ public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
+ final int dictLength = Math.min(this.dictLength, len);
+ out.writeVInt(dictLength);
+ out.writeVInt(blockLength);
+ final int end = off + len;
+
+ // Compress the dictionary first
+ compressor.reset();
+ doCompress(bytes, off, dictLength, out);
+
+ // And then sub blocks
+ for (int start = off + dictLength; start < end; start += blockLength) {
+ compressor.reset();
+ compressor.setDictionary(bytes, off, dictLength);
+ doCompress(bytes, start, Math.min(blockLength, off + len - start), out);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (closed == false) {
+ compressor.end();
+ closed = true;
+ }
+ }
+ }
+
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/package-info.java
similarity index 98%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java
copy to lucene/core/src/java/org/apache/lucene/codecs/lucene87/package-info.java
index 19be7eb..75facdb 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene87/package-info.java
@@ -16,7 +16,7 @@
*/
/**
- * Lucene 8.6 file format.
+ * Lucene 8.7 file format.
*
* <h2>Apache Lucene - Index File Formats</h2>
* <div>
@@ -137,7 +137,7 @@
* This contains the set of field names used in the index.
* </li>
* <li>
- * {@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Stored Field values}.
+ * {@link org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat Stored Field values}.
* This contains, for each document, a list of attribute-value pairs, where the attributes
* are field names. These are used to store auxiliary information about the document, such as
* its title, url, or an identifier to access a database. The set of stored fields are what is
@@ -250,12 +250,12 @@
* <td>Stores information about the fields</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Index}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat Field Index}</td>
* <td>.fdx</td>
* <td>Contains pointers to field data</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat Field Data}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat Field Data}</td>
* <td>.fdt</td>
* <td>The stored fields for documents</td>
* </tr>
@@ -413,4 +413,4 @@
* better yet, {@link org.apache.lucene.store.DataOutput#writeVInt VInt} values which have no limit.</p>
* </div>
*/
-package org.apache.lucene.codecs.lucene86;
+package org.apache.lucene.codecs.lucene87;
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 2897a8a..2be0f71 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene86.Lucene86Codec
+org.apache.lucene.codecs.lucene87.Lucene87Codec
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80NormsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80NormsFormat.java
index b6e7268..011d2ca 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80NormsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene80/TestLucene80NormsFormat.java
@@ -18,14 +18,14 @@ package org.apache.lucene.codecs.lucene80;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
import org.apache.lucene.index.BaseNormsFormatTestCase;
+import org.apache.lucene.util.TestUtil;
/**
* Tests Lucene80NormsFormat
*/
public class TestLucene80NormsFormat extends BaseNormsFormatTestCase {
- private final Codec codec = new Lucene86Codec();
+ private final Codec codec = TestUtil.getDefaultCodec();
@Override
protected Codec getCodec() {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene86/TestLucene86PointsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene86/TestLucene86PointsFormat.java
index 8d5ce08..9198301 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene86/TestLucene86PointsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene86/TestLucene86PointsFormat.java
@@ -49,7 +49,7 @@ public class TestLucene86PointsFormat extends BasePointsFormatTestCase {
public TestLucene86PointsFormat() {
// standard issue
- Codec defaultCodec = new Lucene86Codec();
+ Codec defaultCodec = TestUtil.getDefaultCodec();
if (random().nextBoolean()) {
// randomize parameters
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormat.java
similarity index 90%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormat.java
index 4c7bed4..5604d41 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormat.java
@@ -14,14 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
-
+package org.apache.lucene.codecs.lucene87;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
import org.apache.lucene.util.TestUtil;
-public class TestLucene50StoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
+public class TestLucene87StoredFieldsFormat extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
return TestUtil.getDefaultCodec();
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
similarity index 85%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
index cccee73..f4ebca6 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatHighCompression.java
@@ -14,13 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.codecs.lucene87;
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat.Mode;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.BaseStoredFieldsFormatTestCase;
@@ -29,10 +27,12 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
-public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
+import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+
+public class TestLucene87StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene86Codec(Mode.BEST_COMPRESSION);
+ return new Lucene87Codec(Mode.BEST_COMPRESSION);
}
/**
@@ -43,7 +43,7 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
Directory dir = newDirectory();
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(new Lucene86Codec(RandomPicks.randomFrom(random(), Mode.values())));
+ iwc.setCodec(new Lucene87Codec(RandomPicks.randomFrom(random(), Mode.values())));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -70,11 +70,11 @@ public class TestLucene50StoredFieldsFormatHighCompression extends BaseStoredFie
public void testInvalidOptions() {
expectThrows(NullPointerException.class, () -> {
- new Lucene86Codec(null);
+ new Lucene87Codec(null);
});
expectThrows(NullPointerException.class, () -> {
- new Lucene50StoredFieldsFormat(null);
+ new Lucene87StoredFieldsFormat(null);
});
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatMergeInstance.java
similarity index 87%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatMergeInstance.java
index d0f3157..0015fb2 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50StoredFieldsFormatMergeInstance.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene87/TestLucene87StoredFieldsFormatMergeInstance.java
@@ -14,12 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.codecs.lucene87;
/**
* Test the merge instance of the Lucene50 stored fields format.
*/
-public class TestLucene50StoredFieldsFormatMergeInstance extends TestLucene50StoredFieldsFormat {
+public class TestLucene87StoredFieldsFormatMergeInstance extends TestLucene87StoredFieldsFormat {
@Override
protected boolean shouldTestMergeInstance() {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
index d982953..d937c2f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
@@ -396,7 +396,7 @@ public class TestPointValues extends LuceneTestCase {
public void testDifferentCodecs1() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new IntPoint("int", 1));
@@ -427,7 +427,7 @@ public class TestPointValues extends LuceneTestCase {
w.close();
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
w = new IndexWriter(dir, iwc);
doc = new Document();
doc.add(new IntPoint("int", 1));
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java
index 3400f0e..cac56e9 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBoolean2.java
@@ -23,7 +23,6 @@ import java.util.Collections;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -96,7 +95,7 @@ public class TestBoolean2 extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// randomized codecs are sometimes too costly for this test:
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, iwc);
// we'll make a ton of docs, disable store/norms/vectors
@@ -141,7 +140,7 @@ public class TestBoolean2 extends LuceneTestCase {
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// we need docID order to be preserved:
// randomized codecs are sometimes too costly for this test:
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
iwc.setMergePolicy(newLogMergePolicy());
try (IndexWriter w = new IndexWriter(singleSegmentDirectory, iwc)) {
w.forceMerge(1, true);
@@ -167,7 +166,7 @@ public class TestBoolean2 extends LuceneTestCase {
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// randomized codecs are sometimes too costly for this test:
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
w.addIndexes(copy);
copy.close();
@@ -179,7 +178,7 @@ public class TestBoolean2 extends LuceneTestCase {
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000));
// randomized codecs are sometimes too costly for this test:
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
doc = new Document();
diff --git a/lucene/sandbox/src/test/org/apache/lucene/document/TestFloatPointNearestNeighbor.java b/lucene/sandbox/src/test/org/apache/lucene/document/TestFloatPointNearestNeighbor.java
index a14204c..f77d594 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/document/TestFloatPointNearestNeighbor.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/document/TestFloatPointNearestNeighbor.java
@@ -18,7 +18,6 @@ package org.apache.lucene.document;
import java.util.Arrays;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@@ -243,7 +242,7 @@ public class TestFloatPointNearestNeighbor extends LuceneTestCase {
private IndexWriterConfig getIndexWriterConfig() {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
return iwc;
}
}
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestNearest.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestNearest.java
index a149ace..98a3de1 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestNearest.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestNearest.java
@@ -19,7 +19,6 @@ package org.apache.lucene.search;
import java.util.Arrays;
import java.util.Comparator;
-import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LatLonDocValuesField;
@@ -246,7 +245,7 @@ public class TestNearest extends LuceneTestCase {
private IndexWriterConfig getIndexWriterConfig() {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(Codec.forName("Lucene86"));
+ iwc.setCodec(TestUtil.getDefaultCodec());
return iwc;
}
}
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index 12c8902..f4a7c99 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene87.Lucene87Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
@@ -887,7 +887,7 @@ public class TestSuggestField extends LuceneTestCase {
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
- Codec filterCodec = new Lucene86Codec() {
+ Codec filterCodec = new Lucene87Codec() {
CompletionPostingsFormat.FSTLoadMode fstLoadMode =
RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
index 4f334ae..9fd243f 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
@@ -37,7 +37,7 @@ public abstract class CompressingCodec extends FilterCodec {
* Create a random instance.
*/
public static CompressingCodec randomInstance(Random random, int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockShift) {
- switch (random.nextInt(4)) {
+ switch (random.nextInt(5)) {
case 0:
return new FastCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockShift);
case 1:
@@ -46,6 +46,8 @@ public abstract class CompressingCodec extends FilterCodec {
return new HighCompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockShift);
case 3:
return new DummyCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockShift);
+ case 4:
+ return new DeflateWithPresetCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix, blockShift);
default:
throw new AssertionError();
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/DeflateWithPresetCompressingCodec.java
similarity index 50%
copy from lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
copy to lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/DeflateWithPresetCompressingCodec.java
index c1fd467..9d1791e 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene84/Lucene84RWCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/DeflateWithPresetCompressingCodec.java
@@ -14,26 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene84;
+package org.apache.lucene.codecs.compressing;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.SegmentInfoFormat;
-import org.apache.lucene.codecs.lucene60.Lucene60RWPointsFormat;
-import org.apache.lucene.codecs.lucene70.Lucene70RWSegmentInfoFormat;
+import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat.DeflateWithPresetDict;
-/**
- * RW impersonation of {@link Lucene84Codec}.
- */
-public class Lucene84RWCodec extends Lucene84Codec {
+/** CompressionCodec that uses {@link DeflateWithPresetDict}. */
+public class DeflateWithPresetCompressingCodec extends CompressingCodec {
- @Override
- public PointsFormat pointsFormat() {
- return new Lucene60RWPointsFormat();
+ /** Constructor that allows to configure the chunk size. */
+ public DeflateWithPresetCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix, int blockSize) {
+ super("DeflateWithPresetCompressingStoredFieldsData",
+ withSegmentSuffix ? "DeflateWithPresetCompressingStoredFields" : "",
+ new DeflateWithPresetDict(chunkSize/10, chunkSize/3+1), chunkSize, maxDocsPerChunk, blockSize);
}
- @Override
- public SegmentInfoFormat segmentInfoFormat() {
- return new Lucene70RWSegmentInfoFormat();
+ /** No-arg constructor. */
+ public DeflateWithPresetCompressingCodec() {
+ this(1<<18, 512, false, 10);
}
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
index f556c0d..c080db1 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseGeoPointTestCase.java
@@ -26,6 +26,7 @@ import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
@@ -1276,7 +1277,8 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
- iwc.setCodec(new FilterCodec("Lucene86", TestUtil.getDefaultCodec()) {
+ final Codec in = TestUtil.getDefaultCodec();
+ iwc.setCodec(new FilterCodec(in.getName(), in) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseXYPointTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseXYPointTestCase.java
index f60bd4c..c9240d7 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/geo/BaseXYPointTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/geo/BaseXYPointTestCase.java
@@ -26,6 +26,7 @@ import java.util.Locale;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
@@ -1190,7 +1191,8 @@ public abstract class BaseXYPointTestCase extends LuceneTestCase {
// Else seeds may not reproduce:
iwc.setMergeScheduler(new SerialMergeScheduler());
int pointsInLeaf = 2 + random().nextInt(4);
- iwc.setCodec(new FilterCodec("Lucene86", TestUtil.getDefaultCodec()) {
+ Codec in = TestUtil.getDefaultCodec();
+ iwc.setCodec(new FilterCodec(in.getName(), in) {
@Override
public PointsFormat pointsFormat() {
return new PointsFormat() {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index aef11ac..81cb328 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -33,8 +33,8 @@ import org.apache.lucene.codecs.asserting.AssertingDocValuesFormat;
import org.apache.lucene.codecs.asserting.AssertingPostingsFormat;
import org.apache.lucene.codecs.cheapbastard.CheapBastardCodec;
import org.apache.lucene.codecs.compressing.CompressingCodec;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene87.Lucene87Codec;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.index.RandomCodec;
@@ -187,8 +187,8 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
codec = new AssertingCodec();
} else if ("Compressing".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
codec = CompressingCodec.randomInstance(random);
- } else if ("Lucene84".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene84"))) {
- codec = new Lucene86Codec(RandomPicks.randomFrom(random, Lucene50StoredFieldsFormat.Mode.values())
+ } else if ("Lucene87".equals(TEST_CODEC) || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene87"))) {
+ codec = new Lucene87Codec(RandomPicks.randomFrom(random, Lucene87StoredFieldsFormat.Mode.values())
);
} else if (!"random".equals(TEST_CODEC)) {
codec = Codec.forName(TEST_CODEC);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
index 2dc9ead..7104a85 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
@@ -54,7 +54,7 @@ import org.apache.lucene.codecs.blockterms.LuceneFixedGap;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene87.Lucene87Codec;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
@@ -919,7 +919,7 @@ public final class TestUtil {
* This may be different than {@link Codec#getDefault()} because that is randomized.
*/
public static Codec getDefaultCodec() {
- return new Lucene86Codec();
+ return new Lucene87Codec();
}
/**
diff --git a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 282f5dd..5892cb0 100644
--- a/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -15,6 +15,7 @@
org.apache.lucene.codecs.asserting.AssertingCodec
org.apache.lucene.codecs.cheapbastard.CheapBastardCodec
+org.apache.lucene.codecs.compressing.DeflateWithPresetCompressingCodec
org.apache.lucene.codecs.compressing.FastCompressingCodec
org.apache.lucene.codecs.compressing.FastDecompressionCompressingCodec
org.apache.lucene.codecs.compressing.HighCompressionCompressingCodec
diff --git a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
index 6fc3629..edad01e 100644
--- a/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SchemaCodecFactory.java
@@ -23,8 +23,8 @@ import java.util.Locale;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
-import org.apache.lucene.codecs.lucene86.Lucene86Codec;
+import org.apache.lucene.codecs.lucene87.Lucene87StoredFieldsFormat.Mode;
+import org.apache.lucene.codecs.lucene87.Lucene87Codec;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
@@ -92,7 +92,7 @@ public class SchemaCodecFactory extends CodecFactory implements SolrCoreAware {
compressionMode = SOLR_DEFAULT_COMPRESSION_MODE;
log.debug("Using default compressionMode: {}", compressionMode);
}
- codec = new Lucene86Codec(compressionMode) {
+ codec = new Lucene87Codec(compressionMode) {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
final SchemaField schemaField = core.getLatestSchema().getFieldOrNull(field);