You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2022/04/28 18:19:30 UTC

[lucene] branch branch_9_1 updated: LUCENE-10518: Relax field consistency check for old indices (#842)

This is an automated email from the ASF dual-hosted git repository.

dnhatn pushed a commit to branch branch_9_1
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9_1 by this push:
     new 62ebb22cc07 LUCENE-10518: Relax field consistency check for old indices (#842)
62ebb22cc07 is described below

commit 62ebb22cc07510ac477d6bc76d17e65710395bd6
Author: Nhat Nguyen <nh...@elastic.co>
AuthorDate: Thu Apr 28 14:19:24 2022 -0400

    LUCENE-10518: Relax field consistency check for old indices (#842)
    
    This change relaxes the field consistency check for old indices as we
    didn't enforce that in the previous versions. This commit also disables
    the optimization that relies on the field consistency for old indices.
---
 lucene/CHANGES.txt                                 |  2 +-
 .../java/org/apache/lucene/index/FieldInfo.java    | 50 +++++++++++++-----
 .../java/org/apache/lucene/index/FieldInfos.java   | 60 ++++++++++++++++------
 .../java/org/apache/lucene/index/IndexWriter.java  |  6 ++-
 .../apache/lucene/index/ParallelLeafReader.java    | 10 +++-
 .../test/org/apache/lucene/index/TestCodecs.java   |  6 ++-
 .../src/test/org/apache/lucene/index/TestDoc.java  |  2 +-
 .../org/apache/lucene/index/TestFieldInfos.java    | 56 +++++++++++++++++++-
 .../org/apache/lucene/index/TestFieldsReader.java  |  5 +-
 .../org/apache/lucene/index/TestSegmentMerger.java |  2 +-
 10 files changed, 161 insertions(+), 38 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9c714524a17..833326c8040 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -7,7 +7,7 @@ http://s.apache.org/luceneversions
 
 Bug Fixes
 ---------------------
-(No changes)
+* LUCENE-10518: Relax field consistency check for old indices (Nhat Nguyen)
 
 ======================= Lucene 9.1.0 =======================
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
index 71d9e395ed2..d18d8747178 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfo.java
@@ -210,14 +210,15 @@ public final class FieldInfo {
    * @param o – other FieldInfo whose schema is verified against this FieldInfo's schema
    * @throws IllegalArgumentException if the field schemas are not the same
    */
-  void verifySameSchema(FieldInfo o) {
+  void verifySameSchema(FieldInfo o, boolean strictlyConsistent) {
     String fieldName = this.name;
-    verifySameIndexOptions(fieldName, this.indexOptions, o.getIndexOptions());
+    verifySameIndexOptions(fieldName, this.indexOptions, o.getIndexOptions(), strictlyConsistent);
     if (this.indexOptions != IndexOptions.NONE) {
-      verifySameOmitNorms(fieldName, this.omitNorms, o.omitNorms);
-      verifySameStoreTermVectors(fieldName, this.storeTermVector, o.storeTermVector);
+      verifySameOmitNorms(fieldName, this.omitNorms, o.omitNorms, strictlyConsistent);
+      verifySameStoreTermVectors(
+          fieldName, this.storeTermVector, o.storeTermVector, strictlyConsistent);
     }
-    verifySameDocValuesType(fieldName, this.docValuesType, o.docValuesType);
+    verifySameDocValuesType(fieldName, this.docValuesType, o.docValuesType, strictlyConsistent);
     verifySamePointsOptions(
         fieldName,
         this.pointDimensionCount,
@@ -225,7 +226,8 @@ public final class FieldInfo {
         this.pointNumBytes,
         o.pointDimensionCount,
         o.pointIndexDimensionCount,
-        o.pointNumBytes);
+        o.pointNumBytes,
+        strictlyConsistent);
     verifySameVectorOptions(
         fieldName,
         this.vectorDimension,
@@ -240,7 +242,14 @@ public final class FieldInfo {
    * @throws IllegalArgumentException if they are not the same
    */
   static void verifySameIndexOptions(
-      String fieldName, IndexOptions indexOptions1, IndexOptions indexOptions2) {
+      String fieldName,
+      IndexOptions indexOptions1,
+      IndexOptions indexOptions2,
+      boolean strictlyConsistent) {
+    if (strictlyConsistent == false
+        && (indexOptions1 == IndexOptions.NONE || indexOptions2 == IndexOptions.NONE)) {
+      return;
+    }
     if (indexOptions1 != indexOptions2) {
       throw new IllegalArgumentException(
           "cannot change field \""
@@ -258,7 +267,14 @@ public final class FieldInfo {
    * @throws IllegalArgumentException if they are not the same
    */
   static void verifySameDocValuesType(
-      String fieldName, DocValuesType docValuesType1, DocValuesType docValuesType2) {
+      String fieldName,
+      DocValuesType docValuesType1,
+      DocValuesType docValuesType2,
+      boolean strictlyConsistent) {
+    if (strictlyConsistent == false
+        && (docValuesType1 == DocValuesType.NONE || docValuesType2 == DocValuesType.NONE)) {
+      return;
+    }
     if (docValuesType1 != docValuesType2) {
       throw new IllegalArgumentException(
           "cannot change field \""
@@ -276,8 +292,11 @@ public final class FieldInfo {
    * @throws IllegalArgumentException if they are not the same
    */
   static void verifySameStoreTermVectors(
-      String fieldName, boolean storeTermVector1, boolean storeTermVector2) {
-    if (storeTermVector1 != storeTermVector2) {
+      String fieldName,
+      boolean storeTermVector1,
+      boolean storeTermVector2,
+      boolean strictlyConsistent) {
+    if (strictlyConsistent && storeTermVector1 != storeTermVector2) {
       throw new IllegalArgumentException(
           "cannot change field \""
               + fieldName
@@ -293,8 +312,9 @@ public final class FieldInfo {
    *
    * @throws IllegalArgumentException if they are not the same
    */
-  static void verifySameOmitNorms(String fieldName, boolean omitNorms1, boolean omitNorms2) {
-    if (omitNorms1 != omitNorms2) {
+  static void verifySameOmitNorms(
+      String fieldName, boolean omitNorms1, boolean omitNorms2, boolean strictlyConsistent) {
+    if (strictlyConsistent && omitNorms1 != omitNorms2) {
       throw new IllegalArgumentException(
           "cannot change field \""
               + fieldName
@@ -317,7 +337,11 @@ public final class FieldInfo {
       int numBytes1,
       int pointDimensionCount2,
       int indexDimensionCount2,
-      int numBytes2) {
+      int numBytes2,
+      boolean strictlyConsistent) {
+    if (strictlyConsistent == false && (pointDimensionCount1 == 0 || pointDimensionCount2 == 0)) {
+      return;
+    }
     if (pointDimensionCount1 != pointDimensionCount2
         || indexDimensionCount1 != indexDimensionCount2
         || numBytes1 != numBytes2) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
index 76cf9bb654a..1df0f679375 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@@ -31,11 +31,11 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Version;
 
 /**
  * Collection of {@link FieldInfo}s (accessible by number or by name).
@@ -172,13 +172,39 @@ public class FieldInfos implements Iterable<FieldInfo> {
     } else if (leaves.size() == 1) {
       return leaves.get(0).reader().getFieldInfos();
     } else {
-      final String softDeletesField =
-          leaves.stream()
-              .map(l -> l.reader().getFieldInfos().getSoftDeletesField())
-              .filter(Objects::nonNull)
-              .findAny()
-              .orElse(null);
-      final Builder builder = new Builder(new FieldNumbers(softDeletesField));
+      String softDeletesField = null;
+      int indexCreatedVersionMajor = -1;
+      for (LeafReaderContext leaf : reader.leaves()) {
+        final String leafSoftDeletesField = leaf.reader().getFieldInfos().softDeletesField;
+        if (leafSoftDeletesField != null) {
+          if (softDeletesField != null && softDeletesField.equals(leafSoftDeletesField) == false) {
+            throw new IllegalArgumentException(
+                "Cannot merge segments that have been created with different soft-deletes fields; found ["
+                    + softDeletesField
+                    + " and "
+                    + leafSoftDeletesField
+                    + "]");
+          }
+          softDeletesField = leafSoftDeletesField;
+        }
+        if (leaf.reader().getMetaData() != null) {
+          final int leafVersionMajor = leaf.reader().getMetaData().getCreatedVersionMajor();
+          if (indexCreatedVersionMajor != -1 && indexCreatedVersionMajor != leafVersionMajor) {
+            throw new IllegalArgumentException(
+                "Cannot merge segments that have been created in different major versions; found ["
+                    + indexCreatedVersionMajor
+                    + " and "
+                    + leafVersionMajor
+                    + "]");
+          }
+          indexCreatedVersionMajor = leafVersionMajor;
+        }
+      }
+      if (indexCreatedVersionMajor == -1) {
+        indexCreatedVersionMajor = Version.LATEST.major;
+      }
+      final Builder builder =
+          new Builder(new FieldNumbers(softDeletesField, indexCreatedVersionMajor));
       for (final LeafReaderContext ctx : leaves) {
         for (FieldInfo fieldInfo : ctx.reader().getFieldInfos()) {
           builder.add(fieldInfo);
@@ -339,8 +365,9 @@ public class FieldInfos implements Iterable<FieldInfo> {
 
     // The soft-deletes field from IWC to enforce a single soft-deletes field
     private final String softDeletesFieldName;
+    private final boolean strictlyConsistent;
 
-    FieldNumbers(String softDeletesFieldName) {
+    FieldNumbers(String softDeletesFieldName, int indexCreatedVersionMajor) {
       this.nameToNumber = new HashMap<>();
       this.numberToName = new HashMap<>();
       this.indexOptions = new HashMap<>();
@@ -350,6 +377,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
       this.omitNorms = new HashMap<>();
       this.storeTermVectors = new HashMap<>();
       this.softDeletesFieldName = softDeletesFieldName;
+      this.strictlyConsistent = indexCreatedVersionMajor >= 9;
     }
 
     /**
@@ -426,16 +454,17 @@ public class FieldInfos implements Iterable<FieldInfo> {
     private void verifySameSchema(FieldInfo fi) {
       String fieldName = fi.getName();
       IndexOptions currentOpts = this.indexOptions.get(fieldName);
-      verifySameIndexOptions(fieldName, currentOpts, fi.getIndexOptions());
+      verifySameIndexOptions(fieldName, currentOpts, fi.getIndexOptions(), strictlyConsistent);
       if (currentOpts != IndexOptions.NONE) {
         boolean curStoreTermVector = this.storeTermVectors.get(fieldName);
-        verifySameStoreTermVectors(fieldName, curStoreTermVector, fi.hasVectors());
+        verifySameStoreTermVectors(
+            fieldName, curStoreTermVector, fi.hasVectors(), strictlyConsistent);
         boolean curOmitNorms = this.omitNorms.get(fieldName);
-        verifySameOmitNorms(fieldName, curOmitNorms, fi.omitsNorms());
+        verifySameOmitNorms(fieldName, curOmitNorms, fi.omitsNorms(), strictlyConsistent);
       }
 
       DocValuesType currentDVType = docValuesType.get(fieldName);
-      verifySameDocValuesType(fieldName, currentDVType, fi.getDocValuesType());
+      verifySameDocValuesType(fieldName, currentDVType, fi.getDocValuesType(), strictlyConsistent);
 
       FieldDimensions dims = dimensions.get(fieldName);
       verifySamePointsOptions(
@@ -445,7 +474,8 @@ public class FieldInfos implements Iterable<FieldInfo> {
           dims.dimensionNumBytes,
           fi.getPointDimensionCount(),
           fi.getPointIndexDimensionCount(),
-          fi.getPointNumBytes());
+          fi.getPointNumBytes(),
+          strictlyConsistent);
 
       FieldVectorProperties props = vectorProps.get(fieldName);
       verifySameVectorOptions(
@@ -659,7 +689,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
     FieldInfo add(FieldInfo fi, long dvGen) {
       final FieldInfo curFi = fieldInfo(fi.getName());
       if (curFi != null) {
-        curFi.verifySameSchema(fi);
+        curFi.verifySameSchema(fi, globalFieldNumbers.strictlyConsistent);
         if (fi.attributes() != null) {
           fi.attributes().forEach((k, v) -> curFi.putAttribute(k, v));
         }
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index b303b14a50c..62bec18a934 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -1255,7 +1255,8 @@ public class IndexWriter
    * If this {@link SegmentInfos} has no global field number map the returned instance is empty
    */
   private FieldNumbers getFieldNumberMap() throws IOException {
-    final FieldNumbers map = new FieldNumbers(config.softDeletesField);
+    final FieldNumbers map =
+        new FieldNumbers(config.softDeletesField, segmentInfos.getIndexCreatedVersionMajor());
 
     for (SegmentCommitInfo info : segmentInfos) {
       FieldInfos fis = readFieldInfos(info);
@@ -6336,7 +6337,8 @@ public class IndexWriter
           public FieldInfosBuilder newFieldInfosBuilder(String softDeletesFieldName) {
             return new FieldInfosBuilder() {
               private FieldInfos.Builder builder =
-                  new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName));
+                  new FieldInfos.Builder(
+                      new FieldInfos.FieldNumbers(softDeletesFieldName, Version.LATEST.major));
 
               @Override
               public FieldInfosBuilder add(FieldInfo fi) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
index 788da1b9ace..62fb344607a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
@@ -112,8 +112,16 @@ public class ParallelLeafReader extends LeafReader {
             .findAny()
             .orElse(null);
     // TODO: make this read-only in a cleaner way?
+    final int indexCreatedVersionMajor =
+        completeReaderSet.stream()
+            .map(LeafReader::getMetaData)
+            .filter(Objects::nonNull)
+            .mapToInt(LeafMetaData::getCreatedVersionMajor)
+            .min()
+            .orElse(Version.LATEST.major);
     FieldInfos.Builder builder =
-        new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesField));
+        new FieldInfos.Builder(
+            new FieldInfos.FieldNumbers(softDeletesField, indexCreatedVersionMajor));
 
     Sort indexSort = null;
     int createdVersionMajor = -1;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
index e2ca2ea2e36..ce23cad73d6 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
@@ -228,7 +228,8 @@ public class TestCodecs extends LuceneTestCase {
       terms[i] = new TermData(text, docs, null);
     }
 
-    final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
+    final FieldInfos.Builder builder =
+        new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, Version.LATEST.major));
 
     final FieldData field = new FieldData("field", builder, terms, true, false);
     final FieldData[] fields = new FieldData[] {field};
@@ -290,7 +291,8 @@ public class TestCodecs extends LuceneTestCase {
   }
 
   public void testRandomPostings() throws Throwable {
-    final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null));
+    final FieldInfos.Builder builder =
+        new FieldInfos.Builder(new FieldInfos.FieldNumbers(null, Version.LATEST.major));
 
     final FieldData[] fields = new FieldData[NUM_FIELDS];
     for (int i = 0; i < NUM_FIELDS; i++) {
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
index 90195ede06a..a25b30283a6 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
@@ -235,7 +235,7 @@ public class TestDoc extends LuceneTestCase {
             si,
             InfoStream.getDefault(),
             trackingDir,
-            new FieldInfos.FieldNumbers(null),
+            new FieldInfos.FieldNumbers(null, Version.LATEST.major),
             context);
 
     merger.merge();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
index f446613c04e..ab171a4e976 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldInfos.java
@@ -22,13 +22,18 @@ import static org.hamcrest.CoreMatchers.sameInstance;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Iterator;
+import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.analysis.MockAnalyzer;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
 
 public class TestFieldInfos extends LuceneTestCase {
 
@@ -243,7 +248,8 @@ public class TestFieldInfos extends LuceneTestCase {
   }
 
   public void testFieldNumbersAutoIncrement() {
-    FieldInfos.FieldNumbers fieldNumbers = new FieldInfos.FieldNumbers("softDeletes");
+    FieldInfos.FieldNumbers fieldNumbers =
+        new FieldInfos.FieldNumbers("softDeletes", Version.LATEST.major);
     for (int i = 0; i < 10; i++) {
       fieldNumbers.addOrGet(
           new FieldInfo(
@@ -304,4 +310,52 @@ public class TestFieldInfos extends LuceneTestCase {
                 false));
     assertEquals("Field numbers should reset after clear()", 0, idx);
   }
+
+  public void testRelaxConsistencyCheckForOldIndices() throws IOException {
+    try (Directory dir = newDirectory()) {
+      IndexWriterConfig config =
+          new IndexWriterConfig()
+              .setIndexCreatedVersionMajor(8)
+              .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+      try (IndexWriter writer = new IndexWriter(dir, config)) {
+        // first segment with DV
+        Document d1 = new Document();
+        d1.add(new StringField("my_field", "first", Field.Store.NO));
+        d1.add(new BinaryDocValuesField("my_field", new BytesRef("first")));
+        writer.addDocument(d1);
+        writer.flush();
+        // second segment without DV
+        Document d2 = new Document();
+        d2.add(new StringField("my_field", "second", Field.Store.NO));
+        writer.addDocument(d2);
+        writer.flush();
+        writer.commit();
+      }
+      config = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+      try (IndexWriter writer = new IndexWriter(dir, config)) {
+        // third segment with DV only
+        Document d3 = new Document();
+        d3.add(new BinaryDocValuesField("my_field", new BytesRef("third")));
+        writer.addDocument(d3);
+        writer.flush();
+        writer.commit();
+        // fails due to inconsistent DV type
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> {
+              Document d = new Document();
+              d.add(new NumericDocValuesField("my_field", 3));
+              writer.addDocument(d);
+            });
+        // fails due to inconsistent index options
+        expectThrows(
+            IllegalArgumentException.class,
+            () -> {
+              Document d = new Document();
+              d.add(new TextField("my_field", "more", Field.Store.NO));
+              writer.addDocument(d);
+            });
+      }
+    }
+  }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java
index c9744ec7581..0c1e3ef97f5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldsReader.java
@@ -33,6 +33,7 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.tests.analysis.MockAnalyzer;
 import org.apache.lucene.tests.index.DocHelper;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.Version;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
@@ -45,7 +46,9 @@ public class TestFieldsReader extends LuceneTestCase {
   public static void beforeClass() throws Exception {
     testDoc = new Document();
     final String softDeletesFieldName = null;
-    fieldInfos = new FieldInfos.Builder(new FieldInfos.FieldNumbers(softDeletesFieldName));
+    fieldInfos =
+        new FieldInfos.Builder(
+            new FieldInfos.FieldNumbers(softDeletesFieldName, Version.LATEST.major));
     DocHelper.setupDoc(testDoc);
     for (IndexableField field : testDoc.getFields()) {
       IndexableFieldType ift = field.fieldType();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
index 63b697da2c9..677f27be7ed 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
@@ -103,7 +103,7 @@ public class TestSegmentMerger extends LuceneTestCase {
             si,
             InfoStream.getDefault(),
             mergedDir,
-            new FieldInfos.FieldNumbers(null),
+            new FieldInfos.FieldNumbers(null, Version.LATEST.major),
             newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))));
     MergeState mergeState = merger.merge();
     int docsMerged = mergeState.segmentInfo.maxDoc();