You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/02/04 18:43:34 UTC
[lucene-solr] branch master updated: LUCENE-9705: Create
Lucene90LiveDocsFormat (#2274)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new f0a2f1f LUCENE-9705: Create Lucene90LiveDocsFormat (#2274)
f0a2f1f is described below
commit f0a2f1fe0398a96c6d12959de7069c588395b8af
Author: Julie Tibshirani <ju...@elastic.co>
AuthorDate: Thu Feb 4 10:43:16 2021 -0800
LUCENE-9705: Create Lucene90LiveDocsFormat (#2274)
For now this is just a copy of Lucene50LiveDocsFormat. The existing
Lucene50LiveDocsFormat was moved to backwards-codecs.
---
.../lucene50/Lucene50LiveDocsFormat.java | 7 ++++++-
.../backward_codecs/lucene70/Lucene70Codec.java | 2 +-
.../backward_codecs/lucene80/Lucene80Codec.java | 2 +-
.../backward_codecs/lucene84/Lucene84Codec.java | 2 +-
.../backward_codecs/lucene86/Lucene86Codec.java | 2 +-
.../backward_codecs/lucene87/Lucene87Codec.java | 2 +-
.../lucene50/TestLucene50LiveDocsFormat.java | 6 +++---
.../backward_index/TestBackwardsCompatibility.java | 23 ++++++++++++++++++++++
.../lucene/codecs/lucene90/Lucene90Codec.java | 3 +--
.../Lucene90LiveDocsFormat.java} | 10 +++++-----
.../lucene/codecs/lucene90/package-info.java | 4 ++--
...Format.java => TestLucene90LiveDocsFormat.java} | 2 +-
12 files changed, 46 insertions(+), 19 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java
similarity index 94%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java
index 0a0c476..ebe76ae 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
import java.io.IOException;
import java.util.Collection;
@@ -107,6 +107,11 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
return new FixedBitSet(data, length);
}
+ /**
+ * Note: although this format is only used on older versions, we need to keep the write logic in
+ * addition to the read logic. When we delete documents that live in an older segment, we write to
+ * the live docs for that segment.
+ */
@Override
public void writeLiveDocs(
Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context)
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
index c259fb5..e34502e 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene70;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
@@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
index 6660461..f39ffa7 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.backward_codecs.lucene80;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat;
@@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
index 49383e3..0b3ffb7 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
@@ -17,6 +17,7 @@
package org.apache.lucene.backward_codecs.lucene84;
import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
@@ -36,7 +37,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
index 7d51c67..db02573 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
@@ -18,6 +18,7 @@
package org.apache.lucene.backward_codecs.lucene86;
import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.codecs.Codec;
@@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
index b254fa6..8543de6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
@@ -18,6 +18,7 @@
package org.apache.lucene.backward_codecs.lucene87;
import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompoundFormat;
@@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java
similarity index 87%
copy from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java
index ebf8394..5cd1bc0 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java
@@ -14,16 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
+import org.apache.lucene.backward_codecs.lucene86.Lucene86RWCodec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseLiveDocsFormatTestCase;
-import org.apache.lucene.util.TestUtil;
public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase {
@Override
protected Codec getCodec() {
- return TestUtil.getDefaultCodec();
+ return new Lucene86RWCodec();
}
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
index d89b648..7c75d5a 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
@@ -1835,6 +1835,29 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
dir.close();
}
+ public void testDeletes() throws Exception {
+ Path oldIndexDir = createTempDir("dvupdates");
+ TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ verifyUsesDefaultCodec(dir, dvUpdatesIndex);
+
+ IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ int maxDoc = writer.getDocStats().maxDoc;
+ writer.deleteDocuments(new Term("id", "1"));
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ writer.forceMerge(1);
+ writer.commit();
+ assertEquals(maxDoc - 1, writer.getDocStats().maxDoc);
+
+ writer.close();
+ dir.close();
+ }
+
public void testSoftDeletes() throws Exception {
Path oldIndexDir = createTempDir("dvupdates");
TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
index be3a9e7..6250592 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
@@ -31,7 +31,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.VectorFormat;
import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
@@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec {
private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat();
private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
- private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
+ private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
private final PostingsFormat defaultFormat;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java
similarity index 95%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java
index 0a0c476..e5496a9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.codecs.lucene90;
import java.io.IOException;
import java.util.Collection;
@@ -33,7 +33,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
/**
- * Lucene 5.0 live docs format
+ * Lucene 9.0 live docs format
*
* <p>The .liv file is optional, and only exists when a segment contains deletions.
*
@@ -46,13 +46,13 @@ import org.apache.lucene.util.FixedBitSet;
* <li>Bits --> <{@link DataOutput#writeLong Int64}> <sup>LongCount</sup>
* </ul>
*/
-public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
+public final class Lucene90LiveDocsFormat extends LiveDocsFormat {
/** extension of live docs */
private static final String EXTENSION = "liv";
/** codec of live docs */
- private static final String CODEC_NAME = "Lucene50LiveDocs";
+ private static final String CODEC_NAME = "Lucene90LiveDocs";
/** supported version range */
private static final int VERSION_START = 0;
@@ -60,7 +60,7 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
private static final int VERSION_CURRENT = VERSION_START;
/** Sole constructor. */
- public Lucene50LiveDocsFormat() {}
+ public Lucene90LiveDocsFormat() {}
@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
index 6bc4f5d..b7a9d4a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
@@ -174,7 +174,7 @@
* loaded into main memory for fast access. Whereas stored values are generally intended for
* summary results from searches, per-document values are useful for things like scoring
* factors.
- * <li>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. An
+ * <li>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
* optional file indicating which documents are live.
* <li>{@link org.apache.lucene.codecs.lucene86.Lucene86PointsFormat Point values}. Optional pair
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
@@ -300,7 +300,7 @@
* <td>Contains term vector data.</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents}</td>
* <td>.liv</td>
* <td>Info about what documents are live</td>
* </tr>
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java
similarity index 94%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java
index ebf8394..b4c734b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java
@@ -20,7 +20,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.BaseLiveDocsFormatTestCase;
import org.apache.lucene.util.TestUtil;
-public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase {
+public class TestLucene90LiveDocsFormat extends BaseLiveDocsFormatTestCase {
@Override
protected Codec getCodec() {