You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2021/12/12 06:34:27 UTC

[lucene] branch branch_9x updated: LUCENE-10309: Minimum KnnVector codec support in Luke (#535)

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new e0a6e1c  LUCENE-10309: Minimum KnnVector codec support in Luke (#535)
e0a6e1c is described below

commit e0a6e1c662e4e15937693c90b0dd5a277ca355ba
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sun Dec 12 15:31:18 2021 +0900

    LUCENE-10309: Minimum KnnVector codec support in Luke (#535)
---
 .../desktop/components/DocumentsPanelProvider.java | 29 +++++++++++++++++++---
 .../luke/models/documents/DocumentField.java       | 18 ++++++++++++++
 .../luke/models/documents/TestDocumentsImpl.java   |  4 +++
 3 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java
index d7581f6..613cca4 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java
@@ -35,6 +35,7 @@ import java.io.IOException;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.util.List;
+import java.util.Locale;
 import java.util.Objects;
 import java.util.Optional;
 import javax.swing.BorderFactory;
@@ -154,7 +155,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
     this.tableHeaderRenderer =
         new HelpHeaderRenderer(
             "About Flags",
-            "Format: IdfpoNPSB#txxVDtxxxxTx/x",
+            "Format: IdfpoNPSB#txxVDtxxxxTx/xKxxxx/xxx",
             createFlagsHelpDialog(),
             helpDialogFactory);
 
@@ -173,7 +174,8 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
           "#txx - numeric stored values(type, precision)",
           "V - term vectors",
           "Dtxxxxx - doc values(type)",
-          "Tx/x - point values(num bytes/dimension)"
+          "Tx/x - point values(num bytes/dimension)",
+          "Kxxxx/xxx - knn vector values(dimension/similarity)"
         };
     JList<String> list = new JList<>(values);
     return new JScrollPane(list);
@@ -1049,7 +1051,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
 
     enum Column implements TableColumnInfo {
       FIELD("Field", 0, String.class, 150),
-      FLAGS("Flags", 1, String.class, 200),
+      FLAGS("Flags", 1, String.class, 220),
       NORM("Norm", 2, Long.class, 80),
       VALUE("Value", 3, String.class, 500);
 
@@ -1227,6 +1229,27 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
         sb.append("/");
         sb.append(f.getPointDimensionCount());
       }
+      // knn vector values
+      if (f.getVectorDimension() == 0) {
+        sb.append("---------");
+      } else {
+        sb.append("K");
+        sb.append(String.format(Locale.ENGLISH, "%04d", f.getVectorDimension()));
+        sb.append("/");
+        switch (f.getVectorSimilarity()) {
+          case COSINE:
+            sb.append("cos");
+            break;
+          case DOT_PRODUCT:
+            sb.append("dot");
+            break;
+          case EUCLIDEAN:
+            sb.append("euc");
+            break;
+          default:
+            sb.append("???");
+        }
+      }
       return sb.toString();
     }
 
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java
index a27c8db..460d142 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java
@@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.util.BytesRef;
 
 /** Holder for a document field's information and data. */
@@ -54,6 +55,10 @@ public final class DocumentField {
   private int pointDimensionCount;
   private int pointNumBytes;
 
+  // knn vector values
+  private int vectorDimension;
+  private VectorSimilarityFunction vectorSimilarity;
+
   static DocumentField of(FieldInfo finfo, IndexReader reader, int docId) throws IOException {
     return of(finfo, null, reader, docId);
   }
@@ -84,6 +89,9 @@ public final class DocumentField {
     dfield.pointDimensionCount = finfo.getPointDimensionCount();
     dfield.pointNumBytes = finfo.getPointNumBytes();
 
+    dfield.vectorDimension = finfo.getVectorDimension();
+    dfield.vectorSimilarity = finfo.getVectorSimilarityFunction();
+
     if (field != null) {
       dfield.isStored = field.fieldType().stored();
       dfield.stringValue = field.stringValue();
@@ -148,6 +156,14 @@ public final class DocumentField {
     return pointNumBytes;
   }
 
+  public int getVectorDimension() {
+    return vectorDimension;
+  }
+
+  public VectorSimilarityFunction getVectorSimilarity() {
+    return vectorSimilarity;
+  }
+
   @Override
   public String toString() {
     return "DocumentField{"
@@ -164,6 +180,8 @@ public final class DocumentField {
         + dvType
         + ", pointDimensionCount="
         + pointDimensionCount
+        + ", vectorDimension="
+        + vectorDimension
         + '}';
   }
 
diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java b/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java
index ddddbef..8162b57 100644
--- a/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java
+++ b/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java
@@ -68,6 +68,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
     assertEquals(DocValuesType.NONE, f1.getDvType());
     assertEquals(0, f1.getPointDimensionCount());
     assertEquals(0, f1.getPointNumBytes());
+    assertEquals(0, f1.getVectorDimension());
 
     DocumentField f2 = fields.get(1);
     assertEquals("author", f2.getName());
@@ -83,6 +84,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
     assertEquals(DocValuesType.NONE, f2.getDvType());
     assertEquals(0, f2.getPointDimensionCount());
     assertEquals(0, f2.getPointNumBytes());
+    assertEquals(0, f2.getVectorDimension());
 
     DocumentField f3 = fields.get(2);
     assertEquals("text", f3.getName());
@@ -98,6 +100,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
     assertEquals(DocValuesType.NONE, f3.getDvType());
     assertEquals(0, f3.getPointDimensionCount());
     assertEquals(0, f3.getPointNumBytes());
+    assertEquals(0, f3.getVectorDimension());
 
     DocumentField f4 = fields.get(3);
     assertEquals("subject", f4.getName());
@@ -113,6 +116,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
     assertEquals(DocValuesType.SORTED_SET, f4.getDvType());
     assertEquals(0, f4.getPointDimensionCount());
     assertEquals(0, f4.getPointNumBytes());
+    assertEquals(0, f4.getVectorDimension());
 
     DocumentField f5 = fields.get(4);
     assertEquals("downloads", f5.getName());