You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2021/01/21 15:02:43 UTC
[lucene-solr] branch master updated: LUCENE-9674: Use binary search
in VectorValues.advance()
This is an automated email from the ASF dual-hosted git repository.
sokolov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new e5a16f0 LUCENE-9674: Use binary search in VectorValues.advance()
e5a16f0 is described below
commit e5a16f0b0fdf69d0775afc86a641712ac2011089
Author: Anand <an...@gmail.com>
AuthorDate: Thu Jan 21 20:32:21 2021 +0530
LUCENE-9674: Use binary search in VectorValues.advance()
Lucene90VectorReader now implements advance() with binary search in place of prior linear scan
Co-authored-by: Anand Kotriwal <an...@amazon.com>
---
lucene/CHANGES.txt | 3 ++
.../codecs/lucene90/Lucene90VectorReader.java | 17 ++++++--
.../org/apache/lucene/index/TestVectorValues.java | 46 ++++++++++++++++++++++
3 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fa1d09c..fce70e9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -180,6 +180,9 @@ Improvements
* LUCENE-8982: Make NativeUnixDirectory pure java with FileChannel direct IO flag,
and rename to DirectIODirectory (Zach Chen, Uwe Schindler, Mike McCandless, Dawid Weiss).
+* LUCENE-9674: Implement faster advance on VectorValues using binary search.
+ (Anand Kotriwal, Mike Sokolov)
+
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
index cab9922..91c49e5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
@@ -22,6 +22,7 @@ import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
@@ -386,9 +387,19 @@ public final class Lucene90VectorReader extends VectorReader {
}
@Override
- public int advance(int target) throws IOException {
- // We could do better by log-binary search in ordToDoc, but this is never used
- return slowAdvance(target);
+ public int advance(int target) {
+ assert docID() < target;
+ ord = Arrays.binarySearch(fieldEntry.ordToDoc, ord + 1, fieldEntry.ordToDoc.length, target);
+ if (ord < 0) {
+ ord = -(ord + 1);
+ }
+ assert ord >= 0 && ord <= fieldEntry.ordToDoc.length;
+ if (ord == fieldEntry.ordToDoc.length) {
+ doc = NO_MORE_DOCS;
+ } else {
+ doc = fieldEntry.ordToDoc[ord];
+ }
+ return doc;
}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java b/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
index f61b4b8..c66e7cf 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestVectorValues.java
@@ -815,4 +815,50 @@ public class TestVectorValues extends LuceneTestCase {
assertEquals(2, VectorValues.SearchStrategy.DOT_PRODUCT_HNSW.ordinal());
assertEquals(3, VectorValues.SearchStrategy.values().length);
}
+
+ public void testAdvance() throws Exception {
+ try (Directory dir = newDirectory()) {
+ try (IndexWriter w = new IndexWriter(dir, createIndexWriterConfig())) {
+ int numdocs = atLeast(1500);
+ String fieldName = "field";
+ for (int i = 0; i < numdocs; i++) {
+ Document doc = new Document();
+ // randomly add a vector field
+ if (random().nextInt(4) == 3) {
+ doc.add(new VectorField(fieldName, new float[4], SearchStrategy.NONE));
+ }
+ w.addDocument(doc);
+ }
+ w.forceMerge(1);
+ try (IndexReader reader = w.getReader()) {
+ LeafReader r = getOnlyLeafReader(reader);
+ VectorValues vectorValues = r.getVectorValues(fieldName);
+ int[] vectorDocs = new int[vectorValues.size() + 1];
+ int cur = -1;
+ while (++cur < vectorValues.size() + 1) {
+ vectorDocs[cur] = vectorValues.nextDoc();
+ if (cur != 0) {
+ assertTrue(vectorDocs[cur] > vectorDocs[cur - 1]);
+ }
+ }
+ vectorValues = r.getVectorValues(fieldName);
+ cur = -1;
+ for (int i = 0; i < numdocs; i++) {
+ // randomly advance to i
+ if (random().nextInt(4) == 3) {
+ while (vectorDocs[++cur] < i)
+ ;
+ assertEquals(vectorDocs[cur], vectorValues.advance(i));
+ assertEquals(vectorDocs[cur], vectorValues.docID());
+ if (vectorValues.docID() == NO_MORE_DOCS) {
+ break;
+ }
+ // make i equal to docid so that it is greater than docId in the next loop iteration
+ i = vectorValues.docID();
+ }
+ }
+ }
+ }
+ }
+ }
}