You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/07/20 05:31:59 UTC
[arrow] branch master updated: ARROW-5920: [Java] Support sort &
compare for all variable width vectors
This is an automated email from the ASF dual-hosted git repository.
emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4a3822d ARROW-5920: [Java] Support sort & compare for all variable width vectors
4a3822d is described below
commit 4a3822d2fe20e1ac613b82518f2fb6dd758dab01
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Fri Jul 19 22:31:23 2019 -0700
ARROW-5920: [Java] Support sort & compare for all variable width vectors
All types of variable-width vectors can reuse the same comparator for sorting & searching.
Author: liyafan82 <fa...@foxmail.com>
Closes #4860 from liyafan82/fly_0712_varsort and squashes the following commits:
cbd8c3fbb <liyafan82> Provide a utility to create the default comparator
46d2c1171 <liyafan82> Support sort & compare for all variable width vectors
---
.../algorithm/sort/DefaultVectorComparators.java | 57 +++++++++++++++++-----
.../arrow/algorithm/search/TestVectorSearcher.java | 13 +++--
.../sort/TestFixedWidthInPlaceVectorSorter.java | 2 +-
.../sort/TestFixedWidthOutOfPlaceVectorSorter.java | 12 ++---
.../TestVariableWidthOutOfPlaceVectorSorter.java | 4 +-
5 files changed, 63 insertions(+), 25 deletions(-)
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index 2dfa0aa..a2d2f78 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -17,14 +17,17 @@
package org.apache.arrow.algorithm.sort;
+import static org.apache.arrow.vector.BaseVariableWidthVector.OFFSET_WIDTH;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TinyIntVector;
-import org.apache.arrow.vector.VarCharVector;
-import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.ValueVector;
/**
* Default comparator implementations for different types of vectors.
@@ -32,6 +35,34 @@ import org.apache.arrow.vector.holders.NullableVarCharHolder;
public class DefaultVectorComparators {
/**
+ * Create the default comparator for the vector.
+ * @param vector the vector.
+ * @param <T> the vector type.
+ * @return the default comparator.
+ */
+ public static <T extends ValueVector> VectorValueComparator<T> createDefaultComparator(T vector) {
+ if (vector instanceof BaseFixedWidthVector) {
+ if (vector instanceof TinyIntVector) {
+ return (VectorValueComparator<T>) new ByteComparator();
+ } else if (vector instanceof SmallIntVector) {
+ return (VectorValueComparator<T>) new ShortComparator();
+ } else if (vector instanceof IntVector) {
+ return (VectorValueComparator<T>) new IntComparator();
+ } else if (vector instanceof BigIntVector) {
+ return (VectorValueComparator<T>) new LongComparator();
+ } else if (vector instanceof Float4Vector) {
+ return (VectorValueComparator<T>) new Float4Comparator();
+ } else if (vector instanceof Float8Vector) {
+ return (VectorValueComparator<T>) new Float8Comparator();
+ }
+ } else if (vector instanceof BaseVariableWidthVector) {
+ return (VectorValueComparator<T>) new VariableWidthComparator();
+ }
+
+ throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ }
+
+ /**
* Default comparator for bytes.
* The comparison is based on values, with null comes first.
*/
@@ -169,26 +200,26 @@ public class DefaultVectorComparators {
}
/**
- * Default comparator for varchars.
+ * Default comparator for {@link org.apache.arrow.vector.BaseVariableWidthVector}.
* The comparison is in lexicographic order, with null comes first.
*/
- public static class VarCharComparator extends VectorValueComparator<VarCharVector> {
-
- private NullableVarCharHolder holder1 = new NullableVarCharHolder();
- private NullableVarCharHolder holder2 = new NullableVarCharHolder();
+ public static class VariableWidthComparator extends VectorValueComparator<BaseVariableWidthVector> {
@Override
public int compareNotNull(int index1, int index2) {
- vector1.get(index1, holder1);
- vector2.get(index2, holder2);
+ int start1 = vector1.getOffsetBuffer().getInt(index1 * OFFSET_WIDTH);
+ int start2 = vector2.getOffsetBuffer().getInt(index2 * OFFSET_WIDTH);
+
+ int end1 = vector1.getOffsetBuffer().getInt((index1 + 1) * OFFSET_WIDTH);
+ int end2 = vector2.getOffsetBuffer().getInt((index2 + 1) * OFFSET_WIDTH);
- int length1 = holder1.end - holder1.start;
- int length2 = holder2.end - holder2.start;
+ int length1 = end1 - start1;
+ int length2 = end2 - start2;
int minLength = length1 < length2 ? length1 : length2;
for (int i = 0; i < minLength; i++) {
- byte b1 = holder1.buffer.getByte(holder1.start + i);
- byte b2 = holder2.buffer.getByte(holder2.start + i);
+ byte b1 = vector1.getDataBuffer().getByte(start1 + i);
+ byte b2 = vector2.getDataBuffer().getByte(start2 + i);
if (b1 != b2) {
return b1 - b2;
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
index f5c2912..02e2b20 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
@@ -23,6 +23,7 @@ import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VarCharVector;
import org.junit.After;
@@ -68,7 +69,8 @@ public class TestVectorSearcher {
negVector.set(0, -333);
// do search
- VectorValueComparator<IntVector> comparator = new DefaultVectorComparators.IntComparator();
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
@@ -99,7 +101,8 @@ public class TestVectorSearcher {
negVector.set(0, -333);
// do search
- VectorValueComparator<IntVector> comparator = new DefaultVectorComparators.IntComparator();
+ VectorValueComparator<IntVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
@@ -137,7 +140,8 @@ public class TestVectorSearcher {
negVector.set(0, "abcd".getBytes());
// do search
- VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
+ VectorValueComparator<BaseVariableWidthVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
@@ -175,7 +179,8 @@ public class TestVectorSearcher {
negVector.set(0, "abcd".getBytes());
// do search
- VectorValueComparator<VarCharVector> comparator = new DefaultVectorComparators.VarCharComparator();
+ VectorValueComparator<BaseVariableWidthVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(rawVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i);
assertEquals(i, result);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
index 1a71a53..ecbf9fa 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java
@@ -64,7 +64,7 @@ public class TestFixedWidthInPlaceVectorSorter {
// sort the vector
FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter();
- DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
sorter.sortInPlace(vec, comparator);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
index 4fc4a7a..1dfe946 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -70,7 +70,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.ByteComparator comparator = new DefaultVectorComparators.ByteComparator();
+ VectorValueComparator<TinyIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
TinyIntVector sortedVec =
(TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -117,7 +117,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.ShortComparator comparator = new DefaultVectorComparators.ShortComparator();
+ VectorValueComparator<SmallIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
SmallIntVector sortedVec =
(SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -164,7 +164,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
+ VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
@@ -210,7 +210,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.LongComparator comparator = new DefaultVectorComparators.LongComparator();
+ VectorValueComparator<BigIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
@@ -256,7 +256,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.Float4Comparator comparator = new DefaultVectorComparators.Float4Comparator();
+ VectorValueComparator<Float4Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
@@ -302,7 +302,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
// sort the vector
FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.Float8Comparator comparator = new DefaultVectorComparators.Float8Comparator();
+ VectorValueComparator<Float8Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
sortedVec.allocateNew(vec.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
index 68be254..46b3060 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.VarCharVector;
import org.junit.After;
import org.junit.Assert;
@@ -65,7 +66,8 @@ public class TestVariableWidthOutOfPlaceVectorSorter {
// sort the vector
VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
- DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator();
+ VectorValueComparator<BaseVariableWidthVector> comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
VarCharVector sortedVec =
(VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);