You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@solr.apache.org by "alessandrobenedetti (via GitHub)" <gi...@apache.org> on 2023/04/14 10:57:06 UTC

[GitHub] [solr] alessandrobenedetti commented on a diff in pull request #1435: SOLR-16674: Introduce dense vector byte encoding

alessandrobenedetti commented on code in PR #1435:
URL: https://github.com/apache/solr/pull/1435#discussion_r1166671404


##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -165,38 +183,69 @@ public void checkSchemaField(final SchemaField field) throws SolrException {
 
   @Override
   public List<IndexableField> createFields(SchemaField field, Object value) {
-    ArrayList<IndexableField> fields = new ArrayList<>();
-    float[] parsedVector;
     try {
-      parsedVector = parseVector(value);
+      ArrayList<IndexableField> fields = new ArrayList<>();
+      VectorBuilder vectorBuilder = getVectorBuilder(value, VectorBuilder.BuilderPhase.INDEX);
+
+      if (field.indexed()) {
+        fields.add(createField(field, vectorBuilder));
+      }
+      if (field.stored()) {
+        switch (vectorEncoding) {
+          case FLOAT32:
+            fields.ensureCapacity(vectorBuilder.getFloatVector().length + 1);
+            for (float vectorElement : vectorBuilder.getFloatVector()) {
+              fields.add(getStoredField(field, vectorElement));
+            }
+            break;
+          case BYTE:
+            fields.add(new StoredField(field.getName(), vectorBuilder.getByteVector()));
+            break;
+        }
+      }
+      return fields;
     } catch (RuntimeException e) {
       throw new SolrException(
           SolrException.ErrorCode.SERVER_ERROR,
-          "Error while creating field '"
-              + field
-              + "' from value '"
-              + value
-              + "', expected format:'[f1, f2, f3...fn]' e.g. [1.0, 3.4, 5.6]",
+          "Error while creating field '" + field + "' from value '" + value + "'",
           e);
     }
+  }
 
-    if (field.indexed()) {
-      fields.add(createField(field, parsedVector));
-    }
-    if (field.stored()) {
-      fields.ensureCapacity(parsedVector.length + 1);
-      for (float vectorElement : parsedVector) {
-        fields.add(getStoredField(field, vectorElement));
-      }
+  @Override
+  public IndexableField createField(SchemaField field, Object vectorValue) {
+    if (vectorValue == null) return null;
+    VectorBuilder vectorBuilder = (VectorBuilder) vectorValue;
+    switch (vectorEncoding) {
+      case BYTE:
+        return new KnnByteVectorField(
+            field.getName(), vectorBuilder.getByteVector(), similarityFunction);
+      case FLOAT32:
+        return new KnnFloatVectorField(
+            field.getName(), vectorBuilder.getFloatVector(), similarityFunction);
+      default:
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            "Unexpected state. Vector Encoding: " + vectorEncoding);
     }
-    return fields;
   }
 
   @Override
-  public IndexableField createField(SchemaField field, Object parsedVector) {
-    if (parsedVector == null) return null;
-    float[] typedVector = (float[]) parsedVector;
-    return new KnnVectorField(field.getName(), typedVector, similarityFunction);
+  public Object toObject(IndexableField f) {

Review Comment:
   can you refresh my memory on this method?



##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -205,53 +254,194 @@ public IndexableField createField(SchemaField field, Object parsedVector) {
    * org.apache.solr.handler.loader.CSVLoader} produces an ArrayList of String - {@link
    * org.apache.solr.handler.loader.JsonLoader} produces an ArrayList of Double - {@link
    * org.apache.solr.handler.loader.JavabinLoader} produces an ArrayList of Float
-   *
-   * @param inputValue - An {@link ArrayList} containing the elements of the vector
-   * @return the vector parsed
    */
-  float[] parseVector(Object inputValue) {
-    if (!(inputValue instanceof List)) {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "incorrect vector format."
-              + " The expected format is an array :'[f1,f2..f3]' where each element f is a float");
+  public VectorBuilder getVectorBuilder(Object inputValue, VectorBuilder.BuilderPhase phase) {
+    switch (vectorEncoding) {
+      case FLOAT32:
+        return new VectorBuilder.Float32VectorBuilder(dimension, inputValue, phase);
+      case BYTE:
+        return new VectorBuilder.ByteVectorBuilder(dimension, inputValue, phase);
+      default:
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            "Unexpected state. Vector Encoding: " + vectorEncoding);
     }
-    List<?> inputVector = (List<?>) inputValue;
-    if (inputVector.size() != dimension) {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "incorrect vector dimension."
-              + " The vector value has size "
-              + inputVector.size()
-              + " while it is expected a vector with size "
-              + dimension);
+  }
+
+  abstract static class VectorBuilder {

Review Comment:
   to discuss:  could this be an external class? maybe an util one? 
   Is it more a "VectorParser" rather than a builder maybe?



##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -205,53 +254,194 @@ public IndexableField createField(SchemaField field, Object parsedVector) {
    * org.apache.solr.handler.loader.CSVLoader} produces an ArrayList of String - {@link
    * org.apache.solr.handler.loader.JsonLoader} produces an ArrayList of Double - {@link
    * org.apache.solr.handler.loader.JavabinLoader} produces an ArrayList of Float
-   *
-   * @param inputValue - An {@link ArrayList} containing the elements of the vector
-   * @return the vector parsed
    */
-  float[] parseVector(Object inputValue) {
-    if (!(inputValue instanceof List)) {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "incorrect vector format."
-              + " The expected format is an array :'[f1,f2..f3]' where each element f is a float");
+  public VectorBuilder getVectorBuilder(Object inputValue, VectorBuilder.BuilderPhase phase) {
+    switch (vectorEncoding) {
+      case FLOAT32:
+        return new VectorBuilder.Float32VectorBuilder(dimension, inputValue, phase);
+      case BYTE:
+        return new VectorBuilder.ByteVectorBuilder(dimension, inputValue, phase);
+      default:
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            "Unexpected state. Vector Encoding: " + vectorEncoding);
     }
-    List<?> inputVector = (List<?>) inputValue;
-    if (inputVector.size() != dimension) {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "incorrect vector dimension."
-              + " The vector value has size "
-              + inputVector.size()
-              + " while it is expected a vector with size "
-              + dimension);
+  }
+
+  abstract static class VectorBuilder {
+
+    public static enum BuilderPhase {
+      INDEX,
+      QUERY
+    }
+
+    protected BuilderPhase builderPhase;
+
+    protected int dimension;
+    protected Object inputValue;
+
+    public float[] getFloatVector() {
+      throw new UnsupportedOperationException("Requested wrong vector type");
+    }
+
+    public byte[] getByteVector() {
+      throw new UnsupportedOperationException("Requested wrong vector type");
+    }
+
+    protected void parseVector() {
+      switch (builderPhase) {
+        case INDEX:
+          parseIndexVector();
+          break;
+        case QUERY:
+          parseQueryVector();
+          break;
+      }
     }
 
-    float[] vector = new float[dimension];
-    if (inputVector.get(0) instanceof CharSequence) {
+    protected void parseIndexVector() {
+      if (!(inputValue instanceof List)) {
+        throw new SolrException(
+            SolrException.ErrorCode.BAD_REQUEST, "incorrect vector format. " + errorMessage());
+      }
+      List<?> inputVector = (List<?>) inputValue;
+      if (inputVector.size() != dimension) {
+        throw new SolrException(
+            SolrException.ErrorCode.BAD_REQUEST,
+            "incorrect vector dimension."
+                + " The vector value has size "
+                + inputVector.size()
+                + " while it is expected a vector with size "
+                + dimension);
+      }

Review Comment:
   maybe this duplicated code can be extracted?



##########
solr/core/src/java/org/apache/solr/schema/DenseVectorField.java:
##########
@@ -205,53 +254,194 @@ public IndexableField createField(SchemaField field, Object parsedVector) {
    * org.apache.solr.handler.loader.CSVLoader} produces an ArrayList of String - {@link
    * org.apache.solr.handler.loader.JsonLoader} produces an ArrayList of Double - {@link
    * org.apache.solr.handler.loader.JavabinLoader} produces an ArrayList of Float
-   *
-   * @param inputValue - An {@link ArrayList} containing the elements of the vector
-   * @return the vector parsed
    */
-  float[] parseVector(Object inputValue) {
-    if (!(inputValue instanceof List)) {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "incorrect vector format."
-              + " The expected format is an array :'[f1,f2..f3]' where each element f is a float");
+  public VectorBuilder getVectorBuilder(Object inputValue, VectorBuilder.BuilderPhase phase) {
+    switch (vectorEncoding) {
+      case FLOAT32:
+        return new VectorBuilder.Float32VectorBuilder(dimension, inputValue, phase);
+      case BYTE:
+        return new VectorBuilder.ByteVectorBuilder(dimension, inputValue, phase);
+      default:
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            "Unexpected state. Vector Encoding: " + vectorEncoding);
     }
-    List<?> inputVector = (List<?>) inputValue;
-    if (inputVector.size() != dimension) {
-      throw new SolrException(
-          SolrException.ErrorCode.BAD_REQUEST,
-          "incorrect vector dimension."
-              + " The vector value has size "
-              + inputVector.size()
-              + " while it is expected a vector with size "
-              + dimension);
+  }
+
+  abstract static class VectorBuilder {
+
+    public static enum BuilderPhase {
+      INDEX,
+      QUERY
+    }
+
+    protected BuilderPhase builderPhase;
+
+    protected int dimension;
+    protected Object inputValue;
+
+    public float[] getFloatVector() {
+      throw new UnsupportedOperationException("Requested wrong vector type");

Review Comment:
   wrong? why wrong? what should it be the 'correct' one?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org