You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2021/12/03 14:31:45 UTC
[arrow] branch master updated: ARROW-13811: [Java] Provide a general out-of-place sorter
This is an automated email from the ASF dual-hosted git repository.
liyafan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 6cd288e ARROW-13811: [Java] Provide a general out-of-place sorter
6cd288e is described below
commit 6cd288e686abcdf2957582d6652ddae3d031d0fa
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Fri Dec 3 22:29:23 2021 +0800
ARROW-13811: [Java] Provide a general out-of-place sorter
The sorter should work for any type of vectors, with a time complexity of `O(n*log( n ))`.
Since it does not make any assumptions about the memory layout of the vector, its performance can be sub-optimal. So if another sorter is applicable (e.g. `FixedWidthInPlaceVectorSorter`), it should be used in preference.
Closes #11035 from liyafan82/fly_0831_st
Authored-by: liyafan82 <fa...@foxmail.com>
Signed-off-by: liyafan82 <fa...@foxmail.com>
---
.../sort/GeneralOutOfPlaceVectorSorter.java | 59 +++++++++
.../sort/TestFixedWidthOutOfPlaceVectorSorter.java | 25 ++--
.../sort/TestGeneralOutOfPlaceVectorSorter.java | 142 +++++++++++++++++++++
.../algorithm/sort/TestOutOfPlaceVectorSorter.java | 46 +++++++
.../TestVariableWidthOutOfPlaceVectorSorter.java | 13 +-
5 files changed, 275 insertions(+), 10 deletions(-)
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..9ea39f6
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)).
+ * Since it does not make any assumptions about the memory layout of the vector, its performance
+ * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}),
+ * it should be used in preference.
+ *
+ * @param <V> vector type.
+ */
+public class GeneralOutOfPlaceVectorSorter<V extends ValueVector> implements OutOfPlaceVectorSorter<V> {
+
+ @Override
+ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> comparator) {
+ comparator.attachVector(srcVector);
+
+ // check vector capacity
+ Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(),
+ "Not enough capacity for the target vector. " +
+ "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity());
+
+ // sort value indices
+ try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
+ sortedIndices.allocateNew(srcVector.getValueCount());
+ sortedIndices.setValueCount(srcVector.getValueCount());
+
+ IndexSorter<V> indexSorter = new IndexSorter<>();
+ indexSorter.sort(srcVector, sortedIndices, comparator);
+
+ // copy sorted values to the output vector
+ for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) {
+ int srcIndex = sortedIndices.get(dstIndex);
+
+ dstVector.copyFromSafe(srcIndex, dstIndex, srcVector);
+ }
+ }
+ }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
index e3701f1..cc13e7f 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -24,6 +24,7 @@ import java.util.stream.IntStream;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
@@ -39,10 +40,18 @@ import org.junit.Test;
/**
* Test cases for {@link FixedWidthOutOfPlaceVectorSorter}.
*/
-public class TestFixedWidthOutOfPlaceVectorSorter {
+public class TestFixedWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter {
private BufferAllocator allocator;
+ public TestFixedWidthOutOfPlaceVectorSorter(boolean generalSorter) {
+ super(generalSorter);
+ }
+
+ <V extends BaseFixedWidthVector> OutOfPlaceVectorSorter<V> getSorter() {
+ return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new FixedWidthOutOfPlaceVectorSorter<>();
+ }
+
@Before
public void prepare() {
allocator = new RootAllocator(1024 * 1024);
@@ -72,7 +81,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
vec.set(9, 2);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<TinyIntVector> sorter = getSorter();
VectorValueComparator<TinyIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
TinyIntVector sortedVec =
@@ -119,7 +128,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
vec.set(9, 2);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<SmallIntVector> sorter = getSorter();
VectorValueComparator<SmallIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
SmallIntVector sortedVec =
@@ -166,7 +175,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
vec.set(9, 2);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<IntVector> sorter = getSorter();
VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -212,7 +221,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
vec.set(9, 2L);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<BigIntVector> sorter = getSorter();
VectorValueComparator<BigIntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -258,7 +267,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
vec.set(9, 2f);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<Float4Vector> sorter = getSorter();
VectorValueComparator<Float4Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -304,7 +313,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
vec.set(9, 2);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<Float8Vector> sorter = getSorter();
VectorValueComparator<Float8Vector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
@@ -341,7 +350,7 @@ public class TestFixedWidthOutOfPlaceVectorSorter {
66, 67, 68, 69, 70, 71);
// sort the vector
- FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<IntVector> sorter = getSorter();
VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
try (IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) {
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..07a6b54
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link GeneralOutOfPlaceVectorSorter}.
+ */
+public class TestGeneralOutOfPlaceVectorSorter {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ VectorValueComparator<StructVector> getComparator(StructVector structVector) {
+ IntVector child0 = structVector.getChild("column0", IntVector.class);
+ VectorValueComparator<IntVector> childComp0 = DefaultVectorComparators.createDefaultComparator(child0);
+ childComp0.attachVector(child0);
+
+ IntVector child1 = structVector.getChild("column1", IntVector.class);
+ VectorValueComparator<IntVector> childComp1 = DefaultVectorComparators.createDefaultComparator(child1);
+ childComp1.attachVector(child1);
+
+ VectorValueComparator<StructVector> comp = new VectorValueComparator<StructVector>() {
+
+ @Override
+ public int compareNotNull(int index1, int index2) {
+ // compare values by lexicographic order
+ int result0 = childComp0.compare(index1, index2);
+ if (result0 != 0) {
+ return result0;
+ }
+ return childComp1.compare(index1, index2);
+ }
+
+ @Override
+ public VectorValueComparator createNew() {
+ return this;
+ }
+ };
+
+ return comp;
+ }
+
+ @Test
+ public void testSortStructVector() {
+ final int vectorLength = 7;
+ try (StructVector srcVector = StructVector.empty("src struct", allocator);
+ StructVector dstVector = StructVector.empty("dst struct", allocator)) {
+
+ IntVector srcChild0 =
+ srcVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ IntVector srcChild1 =
+ srcVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+
+ IntVector dstChild0 =
+ dstVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ IntVector dstChild1 =
+ dstVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+
+ // src struct vector values:
+ // [
+ // (2, 1)
+ // (3, 4)
+ // (5, 4)
+ // (null, 3)
+ // (7, null)
+ // (null, null)
+ // (6, 6)
+ // ]
+
+ ValueVectorDataPopulator.setVector(srcChild0, 2, 3, 5, null, 7, null, 6);
+ ValueVectorDataPopulator.setVector(srcChild1, 1, 4, 4, 3, null, null, 6);
+ srcVector.setIndexDefined(0);
+ srcVector.setIndexDefined(1);
+ srcVector.setIndexDefined(2);
+ srcVector.setIndexDefined(3);
+ srcVector.setIndexDefined(4);
+ srcVector.setIndexDefined(6);
+ srcVector.setValueCount(vectorLength);
+
+ dstChild0.allocateNew(vectorLength);
+ dstChild1.allocateNew(vectorLength);
+ dstVector.setValueCount(vectorLength);
+
+ // construct the comparator
+ VectorValueComparator<StructVector> comp = getComparator(srcVector);
+
+ // sort the vector
+ GeneralOutOfPlaceVectorSorter<StructVector> sorter = new GeneralOutOfPlaceVectorSorter<>();
+ sorter.sortOutOfPlace(srcVector, dstVector, comp);
+
+ // validate results
+ assertEquals(vectorLength, dstVector.getValueCount());
+ assertEquals(
+ "[" +
+ "null, " +
+ "{\"column1\":3}, " +
+ "{\"column0\":2,\"column1\":1}, " +
+ "{\"column0\":3,\"column1\":4}, " +
+ "{\"column0\":5,\"column1\":4}, " +
+ "{\"column0\":6,\"column1\":6}, " +
+ "{\"column0\":7}" +
+ "]", dstVector.toString());
+ }
+ }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..66b75cb
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test cases for out-of-place sorters.
+ */
+@RunWith(Parameterized.class)
+public abstract class TestOutOfPlaceVectorSorter {
+
+ protected final boolean generalSorter;
+
+ public TestOutOfPlaceVectorSorter(boolean generalSorter) {
+ this.generalSorter = generalSorter;
+ }
+
+ @Parameterized.Parameters(name = "general sorter = {0}")
+ public static Collection<Object[]> getParameter() {
+ List<Object[]> params = new ArrayList<>();
+ params.add(new Object[] {true});
+ params.add(new Object[] {false});
+ return params;
+ }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
index 46b3060..8f4e3b8 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -32,10 +32,19 @@ import org.junit.Test;
/**
* Test cases for {@link VariableWidthOutOfPlaceVectorSorter}.
*/
-public class TestVariableWidthOutOfPlaceVectorSorter {
+public class TestVariableWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter {
private BufferAllocator allocator;
+ public TestVariableWidthOutOfPlaceVectorSorter(boolean generalSorter) {
+ super(generalSorter);
+ }
+
+ <V extends BaseVariableWidthVector> OutOfPlaceVectorSorter<V> getSorter() {
+ return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new VariableWidthOutOfPlaceVectorSorter<V>();
+ }
+
+
@Before
public void prepare() {
allocator = new RootAllocator(1024 * 1024);
@@ -65,7 +74,7 @@ public class TestVariableWidthOutOfPlaceVectorSorter {
vec.set(9, "yes".getBytes());
// sort the vector
- VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
+ OutOfPlaceVectorSorter<BaseVariableWidthVector> sorter = getSorter();
VectorValueComparator<BaseVariableWidthVector> comparator =
DefaultVectorComparators.createDefaultComparator(vec);