You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/11/10 16:18:09 UTC

[lucene] branch main updated: LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)

This is an automated email from the ASF dual-hosted git repository.

julietibs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 53586d4  LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)
53586d4 is described below

commit 53586d4231e8de2156483be3a9bf7766e81912f9
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Wed Nov 10 08:18:01 2021 -0800

    LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)
    
    Before when creating a KnnVectorsWriter for merging, we consulted the existing
    "PER_FIELD_SUFFIX_KEY" attribute to determine the format's per-field suffix.
    This isn't correct since we could be using a new codec (that produces different
    formats/ suffixes).
    
    This commit modifies TestPerFieldDocValuesFormat#testMergeUsesNewFormat to
    trigger the problem. Without the fix we it throws an error like
    "java.nio.file.FileAlreadyExistsException: File
    "_3_Lucene90HnswVectorsFormat_0.vem" was already written to."
---
 .../codecs/perfield/PerFieldKnnVectorsFormat.java      | 18 +++++-------------
 .../codecs/perfield/TestPerFieldKnnVectorsFormat.java  | 16 ++++++++++++----
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
index 0e5cb00..1ec03da 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
@@ -123,25 +123,17 @@ public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
       final String formatName = format.getName();
 
       field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
-      Integer suffix = null;
+      Integer suffix;
 
       WriterAndSuffix writerAndSuffix = formats.get(format);
       if (writerAndSuffix == null) {
         // First time we are seeing this format; create a new instance
 
-        String suffixAtt = field.getAttribute(PER_FIELD_SUFFIX_KEY);
-        if (suffixAtt != null) {
-          suffix = Integer.valueOf(suffixAtt);
-        }
-
+        suffix = suffixes.get(formatName);
         if (suffix == null) {
-          // bump the suffix
-          suffix = suffixes.get(formatName);
-          if (suffix == null) {
-            suffix = 0;
-          } else {
-            suffix = suffix + 1;
-          }
+          suffix = 0;
+        } else {
+          suffix = suffix + 1;
         }
         suffixes.put(formatName, suffix);
 
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java
index bc0eae5..31371a5 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java
@@ -123,20 +123,27 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
         for (int i = 0; i < 3; i++) {
           Document doc = new Document();
           doc.add(newTextField("id", "1", Field.Store.YES));
-          doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
+          doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
+          doc.add(new KnnVectorField("field2", new float[] {1, 2, 3}));
           iw.addDocument(doc);
           iw.commit();
         }
       }
 
       IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
-      WriteRecordingKnnVectorsFormat newFormat =
+      WriteRecordingKnnVectorsFormat format1 =
+          new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
+      WriteRecordingKnnVectorsFormat format2 =
           new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
       newConfig.setCodec(
           new AssertingCodec() {
             @Override
             public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
-              return newFormat;
+              if ("field1".equals(field)) {
+                return format1;
+              } else {
+                return format2;
+              }
             }
           });
 
@@ -145,7 +152,8 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
       }
 
       // Check that the new format was used while merging
-      MatcherAssert.assertThat(newFormat.fieldsWritten, equalTo(Set.of("field")));
+      MatcherAssert.assertThat(format1.fieldsWritten, equalTo(Set.of("field1")));
+      MatcherAssert.assertThat(format2.fieldsWritten, equalTo(Set.of("field2")));
     }
   }