You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/02/04 18:43:34 UTC

[lucene-solr] branch master updated: LUCENE-9705: Create Lucene90LiveDocsFormat (#2274)

This is an automated email from the ASF dual-hosted git repository.

julietibs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new f0a2f1f  LUCENE-9705: Create Lucene90LiveDocsFormat (#2274)
f0a2f1f is described below

commit f0a2f1fe0398a96c6d12959de7069c588395b8af
Author: Julie Tibshirani <ju...@elastic.co>
AuthorDate: Thu Feb 4 10:43:16 2021 -0800

    LUCENE-9705: Create Lucene90LiveDocsFormat (#2274)
    
    For now this is just a copy of Lucene50LiveDocsFormat. The existing
    Lucene50LiveDocsFormat was moved to backwards-codecs.
---
 .../lucene50/Lucene50LiveDocsFormat.java           |  7 ++++++-
 .../backward_codecs/lucene70/Lucene70Codec.java    |  2 +-
 .../backward_codecs/lucene80/Lucene80Codec.java    |  2 +-
 .../backward_codecs/lucene84/Lucene84Codec.java    |  2 +-
 .../backward_codecs/lucene86/Lucene86Codec.java    |  2 +-
 .../backward_codecs/lucene87/Lucene87Codec.java    |  2 +-
 .../lucene50/TestLucene50LiveDocsFormat.java       |  6 +++---
 .../backward_index/TestBackwardsCompatibility.java | 23 ++++++++++++++++++++++
 .../lucene/codecs/lucene90/Lucene90Codec.java      |  3 +--
 .../Lucene90LiveDocsFormat.java}                   | 10 +++++-----
 .../lucene/codecs/lucene90/package-info.java       |  4 ++--
 ...Format.java => TestLucene90LiveDocsFormat.java} |  2 +-
 12 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java
similarity index 94%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java
index 0a0c476..ebe76ae 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/Lucene50LiveDocsFormat.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -107,6 +107,11 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
     return new FixedBitSet(data, length);
   }
 
+  /**
+   * Note: although this format is only used on older versions, we need to keep the write logic in
+   * addition to the read logic. When we delete documents that live in an older segment, we write to
+   * the live docs for that segment.
+   */
   @Override
   public void writeLiveDocs(
       Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context)
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
index c259fb5..e34502e 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70Codec.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.backward_codecs.lucene70;
 
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
@@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
 import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
index 6660461..f39ffa7 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80Codec.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.backward_codecs.lucene80;
 
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat;
@@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
index 49383e3..0b3ffb7 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene84/Lucene84Codec.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.backward_codecs.lucene84;
 
 import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat.Mode;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
@@ -36,7 +37,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
 import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
index 7d51c67..db02573 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene86/Lucene86Codec.java
@@ -18,6 +18,7 @@
 package org.apache.lucene.backward_codecs.lucene86;
 
 import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.codecs.Codec;
@@ -34,7 +35,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
 import org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
index b254fa6..8543de6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/Lucene87Codec.java
@@ -18,6 +18,7 @@
 package org.apache.lucene.backward_codecs.lucene87;
 
 import java.util.Objects;
+import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.CompoundFormat;
@@ -33,7 +34,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java
similarity index 87%
copy from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java
index ebf8394..5cd1bc0 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene50/TestLucene50LiveDocsFormat.java
@@ -14,16 +14,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.backward_codecs.lucene50;
 
+import org.apache.lucene.backward_codecs.lucene86.Lucene86RWCodec;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseLiveDocsFormatTestCase;
-import org.apache.lucene.util.TestUtil;
 
 public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase {
 
   @Override
   protected Codec getCodec() {
-    return TestUtil.getDefaultCodec();
+    return new Lucene86RWCodec();
   }
 }
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
index d89b648..7c75d5a 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
@@ -1835,6 +1835,29 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
     dir.close();
   }
 
+  public void testDeletes() throws Exception {
+    Path oldIndexDir = createTempDir("dvupdates");
+    TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
+    Directory dir = newFSDirectory(oldIndexDir);
+    verifyUsesDefaultCodec(dir, dvUpdatesIndex);
+
+    IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
+    IndexWriter writer = new IndexWriter(dir, conf);
+
+    int maxDoc = writer.getDocStats().maxDoc;
+    writer.deleteDocuments(new Term("id", "1"));
+    if (random().nextBoolean()) {
+      writer.commit();
+    }
+
+    writer.forceMerge(1);
+    writer.commit();
+    assertEquals(maxDoc - 1, writer.getDocStats().maxDoc);
+
+    writer.close();
+    dir.close();
+  }
+
   public void testSoftDeletes() throws Exception {
     Path oldIndexDir = createTempDir("dvupdates");
     TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
index be3a9e7..6250592 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90Codec.java
@@ -31,7 +31,6 @@ import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.VectorFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50CompoundFormat;
-import org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat;
 import org.apache.lucene.codecs.lucene50.Lucene50TermVectorsFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat;
 import org.apache.lucene.codecs.lucene80.Lucene80NormsFormat;
@@ -73,7 +72,7 @@ public class Lucene90Codec extends Codec {
   private final TermVectorsFormat vectorsFormat = new Lucene50TermVectorsFormat();
   private final FieldInfosFormat fieldInfosFormat = new Lucene90FieldInfosFormat();
   private final SegmentInfoFormat segmentInfosFormat = new Lucene86SegmentInfoFormat();
-  private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
+  private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
   private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
   private final PostingsFormat defaultFormat;
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java
similarity index 95%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java
index 0a0c476..e5496a9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50LiveDocsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90LiveDocsFormat.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.codecs.lucene50;
+package org.apache.lucene.codecs.lucene90;
 
 import java.io.IOException;
 import java.util.Collection;
@@ -33,7 +33,7 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 
 /**
- * Lucene 5.0 live docs format
+ * Lucene 9.0 live docs format
  *
  * <p>The .liv file is optional, and only exists when a segment contains deletions.
  *
@@ -46,13 +46,13 @@ import org.apache.lucene.util.FixedBitSet;
  *   <li>Bits --&gt; &lt;{@link DataOutput#writeLong Int64}&gt; <sup>LongCount</sup>
  * </ul>
  */
-public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
+public final class Lucene90LiveDocsFormat extends LiveDocsFormat {
 
   /** extension of live docs */
   private static final String EXTENSION = "liv";
 
   /** codec of live docs */
-  private static final String CODEC_NAME = "Lucene50LiveDocs";
+  private static final String CODEC_NAME = "Lucene90LiveDocs";
 
   /** supported version range */
   private static final int VERSION_START = 0;
@@ -60,7 +60,7 @@ public final class Lucene50LiveDocsFormat extends LiveDocsFormat {
   private static final int VERSION_CURRENT = VERSION_START;
 
   /** Sole constructor. */
-  public Lucene50LiveDocsFormat() {}
+  public Lucene90LiveDocsFormat() {}
 
   @Override
   public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
index 6bc4f5d..b7a9d4a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/package-info.java
@@ -174,7 +174,7 @@
  *       loaded into main memory for fast access. Whereas stored values are generally intended for
  *       summary results from searches, per-document values are useful for things like scoring
  *       factors.
- *   <li>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. An
+ *   <li>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
  *       optional file indicating which documents are live.
  *   <li>{@link org.apache.lucene.codecs.lucene86.Lucene86PointsFormat Point values}. Optional pair
  *       of files, recording dimensionally indexed fields, to enable fast numeric range filtering
@@ -300,7 +300,7 @@
  * <td>Contains term vector data.</td>
  * </tr>
  * <tr>
- * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents}</td>
  * <td>.liv</td>
  * <td>Info about what documents are live</td>
  * </tr>
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java
similarity index 94%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
rename to lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java
index ebf8394..b4c734b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50LiveDocsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene90LiveDocsFormat.java
@@ -20,7 +20,7 @@ import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.BaseLiveDocsFormatTestCase;
 import org.apache.lucene.util.TestUtil;
 
-public class TestLucene50LiveDocsFormat extends BaseLiveDocsFormatTestCase {
+public class TestLucene90LiveDocsFormat extends BaseLiveDocsFormatTestCase {
 
   @Override
   protected Codec getCodec() {