You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2021/12/12 06:34:27 UTC
[lucene] branch branch_9x updated: LUCENE-10309: Minimum KnnVector codec support in Luke (#535)
This is an automated email from the ASF dual-hosted git repository.
tomoko pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new e0a6e1c LUCENE-10309: Minimum KnnVector codec support in Luke (#535)
e0a6e1c is described below
commit e0a6e1c662e4e15937693c90b0dd5a277ca355ba
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Sun Dec 12 15:31:18 2021 +0900
LUCENE-10309: Minimum KnnVector codec support in Luke (#535)
---
.../desktop/components/DocumentsPanelProvider.java | 29 +++++++++++++++++++---
.../luke/models/documents/DocumentField.java | 18 ++++++++++++++
.../luke/models/documents/TestDocumentsImpl.java | 4 +++
3 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java
index d7581f6..613cca4 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java
@@ -35,6 +35,7 @@ import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.List;
+import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import javax.swing.BorderFactory;
@@ -154,7 +155,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
this.tableHeaderRenderer =
new HelpHeaderRenderer(
"About Flags",
- "Format: IdfpoNPSB#txxVDtxxxxTx/x",
+ "Format: IdfpoNPSB#txxVDtxxxxTx/xKxxxx/xxx",
createFlagsHelpDialog(),
helpDialogFactory);
@@ -173,7 +174,8 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
"#txx - numeric stored values(type, precision)",
"V - term vectors",
"Dtxxxxx - doc values(type)",
- "Tx/x - point values(num bytes/dimension)"
+ "Tx/x - point values(num bytes/dimension)",
+ "Kxxxx/xxx - knn vector values(dimension/similarity)"
};
JList<String> list = new JList<>(values);
return new JScrollPane(list);
@@ -1049,7 +1051,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
enum Column implements TableColumnInfo {
FIELD("Field", 0, String.class, 150),
- FLAGS("Flags", 1, String.class, 200),
+ FLAGS("Flags", 1, String.class, 220),
NORM("Norm", 2, Long.class, 80),
VALUE("Value", 3, String.class, 500);
@@ -1227,6 +1229,27 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
sb.append("/");
sb.append(f.getPointDimensionCount());
}
+ // knn vector values
+ if (f.getVectorDimension() == 0) {
+ sb.append("---------");
+ } else {
+ sb.append("K");
+ sb.append(String.format(Locale.ENGLISH, "%04d", f.getVectorDimension()));
+ sb.append("/");
+ switch (f.getVectorSimilarity()) {
+ case COSINE:
+ sb.append("cos");
+ break;
+ case DOT_PRODUCT:
+ sb.append("dot");
+ break;
+ case EUCLIDEAN:
+ sb.append("euc");
+ break;
+ default:
+ sb.append("???");
+ }
+ }
return sb.toString();
}
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java
index a27c8db..460d142 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java
@@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef;
/** Holder for a document field's information and data. */
@@ -54,6 +55,10 @@ public final class DocumentField {
private int pointDimensionCount;
private int pointNumBytes;
+ // knn vector values
+ private int vectorDimension;
+ private VectorSimilarityFunction vectorSimilarity;
+
static DocumentField of(FieldInfo finfo, IndexReader reader, int docId) throws IOException {
return of(finfo, null, reader, docId);
}
@@ -84,6 +89,9 @@ public final class DocumentField {
dfield.pointDimensionCount = finfo.getPointDimensionCount();
dfield.pointNumBytes = finfo.getPointNumBytes();
+ dfield.vectorDimension = finfo.getVectorDimension();
+ dfield.vectorSimilarity = finfo.getVectorSimilarityFunction();
+
if (field != null) {
dfield.isStored = field.fieldType().stored();
dfield.stringValue = field.stringValue();
@@ -148,6 +156,14 @@ public final class DocumentField {
return pointNumBytes;
}
+ public int getVectorDimension() {
+ return vectorDimension;
+ }
+
+ public VectorSimilarityFunction getVectorSimilarity() {
+ return vectorSimilarity;
+ }
+
@Override
public String toString() {
return "DocumentField{"
@@ -164,6 +180,8 @@ public final class DocumentField {
+ dvType
+ ", pointDimensionCount="
+ pointDimensionCount
+ + ", vectorDimension="
+ + vectorDimension
+ '}';
}
diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java b/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java
index ddddbef..8162b57 100644
--- a/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java
+++ b/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java
@@ -68,6 +68,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f1.getDvType());
assertEquals(0, f1.getPointDimensionCount());
assertEquals(0, f1.getPointNumBytes());
+ assertEquals(0, f1.getVectorDimension());
DocumentField f2 = fields.get(1);
assertEquals("author", f2.getName());
@@ -83,6 +84,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f2.getDvType());
assertEquals(0, f2.getPointDimensionCount());
assertEquals(0, f2.getPointNumBytes());
+ assertEquals(0, f2.getVectorDimension());
DocumentField f3 = fields.get(2);
assertEquals("text", f3.getName());
@@ -98,6 +100,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f3.getDvType());
assertEquals(0, f3.getPointDimensionCount());
assertEquals(0, f3.getPointNumBytes());
+ assertEquals(0, f3.getVectorDimension());
DocumentField f4 = fields.get(3);
assertEquals("subject", f4.getName());
@@ -113,6 +116,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.SORTED_SET, f4.getDvType());
assertEquals(0, f4.getPointDimensionCount());
assertEquals(0, f4.getPointNumBytes());
+ assertEquals(0, f4.getVectorDimension());
DocumentField f5 = fields.get(4);
assertEquals("downloads", f5.getName());