You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/11/10 16:18:09 UTC
[lucene] branch main updated: LUCENE-10228: Ensure
PerFieldKnnVectorsFormat uses right format name (#432)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 53586d4 LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)
53586d4 is described below
commit 53586d4231e8de2156483be3a9bf7766e81912f9
Author: Julie Tibshirani <ju...@apache.org>
AuthorDate: Wed Nov 10 08:18:01 2021 -0800
LUCENE-10228: Ensure PerFieldKnnVectorsFormat uses right format name (#432)
Before when creating a KnnVectorsWriter for merging, we consulted the existing
"PER_FIELD_SUFFIX_KEY" attribute to determine the format's per-field suffix.
This isn't correct since we could be using a new codec (that produces different
formats/ suffixes).
This commit modifies TestPerFieldDocValuesFormat#testMergeUsesNewFormat to
trigger the problem. Without the fix we it throws an error like
"java.nio.file.FileAlreadyExistsException: File
"_3_Lucene90HnswVectorsFormat_0.vem" was already written to."
---
.../codecs/perfield/PerFieldKnnVectorsFormat.java | 18 +++++-------------
.../codecs/perfield/TestPerFieldKnnVectorsFormat.java | 16 ++++++++++++----
2 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
index 0e5cb00..1ec03da 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java
@@ -123,25 +123,17 @@ public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat {
final String formatName = format.getName();
field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
- Integer suffix = null;
+ Integer suffix;
WriterAndSuffix writerAndSuffix = formats.get(format);
if (writerAndSuffix == null) {
// First time we are seeing this format; create a new instance
- String suffixAtt = field.getAttribute(PER_FIELD_SUFFIX_KEY);
- if (suffixAtt != null) {
- suffix = Integer.valueOf(suffixAtt);
- }
-
+ suffix = suffixes.get(formatName);
if (suffix == null) {
- // bump the suffix
- suffix = suffixes.get(formatName);
- if (suffix == null) {
- suffix = 0;
- } else {
- suffix = suffix + 1;
- }
+ suffix = 0;
+ } else {
+ suffix = suffix + 1;
}
suffixes.put(formatName, suffix);
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java
index bc0eae5..31371a5 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java
@@ -123,20 +123,27 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
for (int i = 0; i < 3; i++) {
Document doc = new Document();
doc.add(newTextField("id", "1", Field.Store.YES));
- doc.add(new KnnVectorField("field", new float[] {1, 2, 3}));
+ doc.add(new KnnVectorField("field1", new float[] {1, 2, 3}));
+ doc.add(new KnnVectorField("field2", new float[] {1, 2, 3}));
iw.addDocument(doc);
iw.commit();
}
}
IndexWriterConfig newConfig = newIndexWriterConfig(new MockAnalyzer(random()));
- WriteRecordingKnnVectorsFormat newFormat =
+ WriteRecordingKnnVectorsFormat format1 =
+ new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
+ WriteRecordingKnnVectorsFormat format2 =
new WriteRecordingKnnVectorsFormat(TestUtil.getDefaultKnnVectorsFormat());
newConfig.setCodec(
new AssertingCodec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
- return newFormat;
+ if ("field1".equals(field)) {
+ return format1;
+ } else {
+ return format2;
+ }
}
});
@@ -145,7 +152,8 @@ public class TestPerFieldKnnVectorsFormat extends BaseKnnVectorsFormatTestCase {
}
// Check that the new format was used while merging
- MatcherAssert.assertThat(newFormat.fieldsWritten, equalTo(Set.of("field")));
+ MatcherAssert.assertThat(format1.fieldsWritten, equalTo(Set.of("field1")));
+ MatcherAssert.assertThat(format2.fieldsWritten, equalTo(Set.of("field2")));
}
}