You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2022/09/01 13:54:10 UTC

[lucene] branch main updated (fd86968fee5 -> 0462a0ad73c)

This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


    from fd86968fee5 remove a link to old Jira in README.
     new 1649964f076 Forward-port CHANGES entry for quantized HNSW vectors from 9.x branch
     new 0462a0ad73c fixed index order needed for TestKnnVectorQuery.testScoreEuclidean (#11732)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lucene/CHANGES.txt                                 |  2 +-
 .../apache/lucene/search/TestKnnVectorQuery.java   | 44 +++++++++++++++++-----
 2 files changed, 35 insertions(+), 11 deletions(-)


[lucene] 01/02: Forward-port CHANGES entry for quantized HNSW vectors from 9.x branch

Posted by so...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 1649964f076602d9b79293df8113ab42c6dc80ee
Author: Michael Sokolov <so...@amazon.com>
AuthorDate: Wed Aug 31 10:06:22 2022 -0400

    Forward-port CHANGES entry for quantized HNSW vectors from 9.x branch
---
 lucene/CHANGES.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ff2713ba6a5..02956ce0b37 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -85,7 +85,7 @@ Other
 
 API Changes
 ---------------------
-(No changes)
+* LUCENE-10577: Add VectorEncoding to enable byte-encoded HNSW vectors (Michael Sokolov, Julie Tibshirani)
 
 New Features
 ---------------------


[lucene] 02/02: fixed index order needed for TestKnnVectorQuery.testScoreEuclidean (#11732)

Posted by so...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 0462a0ad73c45b56fcc40b3bc9ae24e4bf010363
Author: Michael Sokolov <so...@falutin.net>
AuthorDate: Thu Sep 1 09:52:48 2022 -0400

    fixed index order needed for TestKnnVectorQuery.testScoreEuclidean (#11732)
---
 .../apache/lucene/search/TestKnnVectorQuery.java   | 44 +++++++++++++++++-----
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
index 72b6bd348c7..eae46307994 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestKnnVectorQuery.java
@@ -245,7 +245,7 @@ public class TestKnnVectorQuery extends LuceneTestCase {
     for (int j = 0; j < 5; j++) {
       vectors[j] = new float[] {j, j};
     }
-    try (Directory d = getIndexStore("field", 1, vectors);
+    try (Directory d = getStableIndexStore("field", vectors);
         IndexReader reader = DirectoryReader.open(d)) {
       IndexSearcher searcher = new IndexSearcher(reader);
       KnnVectorQuery query = new KnnVectorQuery("field", new float[] {2, 3}, 3);
@@ -756,13 +756,8 @@ public class TestKnnVectorQuery extends LuceneTestCase {
     }
   }
 
-  private Directory getIndexStore(String field, float[]... contents) throws IOException {
-    return getIndexStore(field, -1, contents);
-  }
-
   /** Creates a new directory and adds documents with the given vectors as kNN vector fields */
-  private Directory getIndexStore(String field, int forceMerge, float[]... contents)
-      throws IOException {
+  private Directory getIndexStore(String field, float[]... contents) throws IOException {
     Directory indexStore = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
     VectorEncoding encoding = randomVectorEncoding();
@@ -786,13 +781,42 @@ public class TestKnnVectorQuery extends LuceneTestCase {
       doc.add(new StringField("other", "value", Field.Store.NO));
       writer.addDocument(doc);
     }
-    if (forceMerge > 0) {
-      writer.forceMerge(forceMerge);
-    }
     writer.close();
     return indexStore;
   }
 
+  /**
+   * Creates a new directory and adds documents with the given vectors as kNN vector fields,
+   * preserving the order of the added documents.
+   */
+  private Directory getStableIndexStore(String field, float[]... contents) throws IOException {
+    Directory indexStore = newDirectory();
+    try (IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig())) {
+      VectorEncoding encoding = randomVectorEncoding();
+      for (int i = 0; i < contents.length; ++i) {
+        Document doc = new Document();
+        if (encoding == VectorEncoding.BYTE) {
+          BytesRef v = new BytesRef(new byte[contents[i].length]);
+          for (int j = 0; j < v.length; j++) {
+            v.bytes[j] = (byte) contents[i][j];
+          }
+          doc.add(new KnnVectorField(field, v, EUCLIDEAN));
+        } else {
+          doc.add(new KnnVectorField(field, contents[i]));
+        }
+        doc.add(new StringField("id", "id" + i, Field.Store.YES));
+        writer.addDocument(doc);
+      }
+      // Add some documents without a vector
+      for (int i = 0; i < 5; i++) {
+        Document doc = new Document();
+        doc.add(new StringField("other", "value", Field.Store.NO));
+        writer.addDocument(doc);
+      }
+    }
+    return indexStore;
+  }
+
   private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches)
       throws IOException {
     ScoreDoc[] result = searcher.search(q, 1000).scoreDocs;