You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/07/20 05:31:59 UTC

[arrow] branch master updated: ARROW-5920: [Java] Support sort & compare for all variable width vectors

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4a3822d  ARROW-5920: [Java] Support sort & compare for all variable width vectors
4a3822d is described below

commit 4a3822d2fe20e1ac613b82518f2fb6dd758dab01
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Fri Jul 19 22:31:23 2019 -0700

    ARROW-5920: [Java] Support sort & compare for all variable width vectors
    
    All types of variable-width vectors can reuse the same comparator for sorting & searching.
    
    Author: liyafan82 <fa...@foxmail.com>
    
    Closes #4860 from liyafan82/fly_0712_varsort and squashes the following commits:
    
    cbd8c3fbb <liyafan82>   Provide a utility to create the default comparator
    46d2c1171 <liyafan82>  Support sort & compare for all variable width vectors
---
 .../algorithm/sort/DefaultVectorComparators.java   | 57 +++++++++++++++++-----
 .../arrow/algorithm/search/TestVectorSearcher.java | 13 +++--
 .../sort/TestFixedWidthInPlaceVectorSorter.java    |  2 +-
 .../sort/TestFixedWidthOutOfPlaceVectorSorter.java | 12 ++---
 .../TestVariableWidthOutOfPlaceVectorSorter.java   |  4 +-
 5 files changed, 63 insertions(+), 25 deletions(-)

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index 2dfa0aa..a2d2f78 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -17,14 +17,17 @@
 
 package org.apache.arrow.algorithm.sort;
 
+import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.SmallIntVector;
 import org.apache.arrow.vector.TinyIntVector;
-import org.apache.arrow.vector.VarCharVector;
-import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.ValueVector;
 
 /**
  * Default comparator implementations for different types of vectors.
@@ -32,6 +35,34 @@ import org.apache.arrow.vector.holders.NullableVarCharHolder;
 public class DefaultVectorComparators {
 
   /**
+   * Create the default comparator for the vector.
+   * @param vector the vector.
+   * @param <T> the vector type.
+   * @return the default comparator.
+   */
+  public static <T extends ValueVector> VectorValueComparator<T> createDefaultComparator(T vector) {
+    if (vector instanceof BaseFixedWidthVector) {
+      if (vector instanceof TinyIntVector) {
+        return (VectorValueComparator<T>) new ByteComparator();
+      } else if (vector instanceof SmallIntVector) {
+        return (VectorValueComparator<T>) new ShortComparator();
+      } else if (vector instanceof IntVector) {
+        return (VectorValueComparator<T>) new IntComparator();
+      } else if (vector instanceof BigIntVector) {
+        return (VectorValueComparator<T>) new LongComparator();
+      } else if (vector instanceof Float4Vector) {
+        return (VectorValueComparator<T>) new Float4Comparator();
+      } else if (vector instanceof Float8Vector) {
+        return (VectorValueComparator<T>) new Float8Comparator();
+      }
+    } else if (vector instanceof BaseVariableWidthVector) {
+      return (VectorValueComparator<T>) new VariableWidthComparator();
+    }
+
+    throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+  }
+
+  /**
    * Default comparator for bytes.
    * The comparison is based on values, with null comes first.
    */
@@ -169,26 +200,26 @@ public class DefaultVectorComparators {
   }
 
   /**
-   * Default comparator for varchars.
+   * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
    * The comparison is in lexicographic order, with null comes first.
    */
-  public static class VarCharComparator extends VectorValueComparator<VarCharVector> {
-
-    private NullableVarCharHolder holder1 = new NullableVarCharHolder();
-    private NullableVarCharHolder holder2 = new NullableVarCharHolder();
+  public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {
 
     @Override
     public int compareNotNull(int index1, int index2) {
-      vector1.get(index1, holder1);
-      vector2.get(index2, holder2);
+      int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
+      int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);
+
+      int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
+      int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);
 
-      int length1 = holder1.end - holder1.start;
-      int length2 = holder2.end - holder2.start;
+      int length1 = end1 - start1;
+      int length2 = end2 - start2;
 
       int minLength = length1 < length2 ? length1 : length2;
       for (int i = 0; i < minLength; i++) {
-        byte b1 = holder1.buffer.getByte(holder1.start + i);
-        byte b2 = holder2.buffer.getByte(holder2.start + i);
+        byte b1 = vector1.getDataBuffer().getByte(start1 + i);
+        byte b2 = vector2.getDataBuffer().getByte(start2 + i);
 
         if (b1 != b2) {
           return b1 - b2;
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
index f5c2912..02e2b20 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
@@ -23,6 +23,7 @@ import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
 import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.junit.After;
@@ -68,7 +69,8 @@ public class TestVectorSearcher {
       negVector.set(0, -333);
 
       // do search
-      VectorValueComparator<IntVector> comparator = new DefaultVectorComparators.IntComparator();
+      VectorValueComparator<IntVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
         assertEquals(i, result);
@@ -99,7 +101,8 @@ public class TestVectorSearcher {
       negVector.set(0, -333);
 
       // do search
-      VectorValueComparator<IntVector> comparator = new DefaultVectorComparators.IntComparator();
+      VectorValueComparator<IntVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
         assertEquals(i, result);
@@ -137,7 +140,8 @@ public class TestVectorSearcher {
       negVector.set(0, "abcd".getBytes());
 
       // do search
-      VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
+      VectorValueComparator<BaseVariableWidthVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
         assertEquals(i, result);
@@ -175,7 +179,8 @@ public class TestVectorSearcher {
       negVector.set(0, "abcd".getBytes());
 
       // do search
-      VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
+      VectorValueComparator<BaseVariableWidthVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(rawVector);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
         assertEquals(i, result);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
index 1a71a53..ecbf9fa 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
@@ -64,7 +64,7 @@ public class TestFixedWidthInPlaceVectorSorter {
 
       // sort the vector
       FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
-      DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
+      VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       sorter.sortInPlace(vec, comparator);
 
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
index 4fc4a7a..1dfe946 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -70,7 +70,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.ByteComparator comparator = new DefaultVectorComparators.ByteComparator();
+      VectorValueComparator<TinyIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       TinyIntVector sortedVec =
               (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -117,7 +117,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.ShortComparator comparator = new DefaultVectorComparators.ShortComparator();
+      VectorValueComparator<SmallIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       SmallIntVector sortedVec =
               (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -164,7 +164,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
+      VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
       sortedVec.allocateNew(vec.getValueCount());
@@ -210,7 +210,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.LongComparator comparator = new DefaultVectorComparators.LongComparator();
+      VectorValueComparator<BigIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
       sortedVec.allocateNew(vec.getValueCount());
@@ -256,7 +256,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.Float4Comparator comparator = new DefaultVectorComparators.Float4Comparator();
+      VectorValueComparator<Float4Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
       sortedVec.allocateNew(vec.getValueCount());
@@ -302,7 +302,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.Float8Comparator comparator = new DefaultVectorComparators.Float8Comparator();
+      VectorValueComparator<Float8Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
 
       Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
       sortedVec.allocateNew(vec.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
index 68be254..46b3060 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.junit.After;
 import org.junit.Assert;
@@ -65,7 +66,8 @@ public class TestVariableWidthOutOfPlaceVectorSorter {
 
       // sort the vector
       VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
-      DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator();
+      VectorValueComparator<BaseVariableWidthVector> comparator =
+              DefaultVectorComparators.createDefaultComparator(vec);
 
       VarCharVector sortedVec =
               (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);