You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by em...@apache.org on 2019/06/24 14:07:05 UTC

[arrow] branch master updated: ARROW-5581: [Java] Provide interfaces and initial implementations for vector sorting

This is an automated email from the ASF dual-hosted git repository.

emkornfield pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new da3a42b  ARROW-5581: [Java] Provide interfaces and initial implementations for vector sorting
da3a42b is described below

commit da3a42b0f867bc027ac79d510d8bbe787fb0a5f9
Author: liyafan82 <fa...@foxmail.com>
AuthorDate: Mon Jun 24 07:06:07 2019 -0700

    ARROW-5581: [Java] Provide interfaces and initial implementations for vector sorting
    
    Data sorting is an important and common feature. For this issue, we provide the basic interfaces for vector sorting. Users can implement customized sorting algorithms by extending our interfaces. In addition, we also give initial sorting implementations for both fixed-width and variable-width vectors.
    
    Author: liyafan82 <fa...@foxmail.com>
    
    Closes #4542 from liyafan82/fly_0613_sort and squashes the following commits:
    
    ce1098404 <liyafan82>  Surround sorted index in try resouce block
    8af028250 <liyafan82>  Resolve the path problem and split test cases
    4fe90ba6b <liyafan82>  Remove the common super interface for in-place & out-of-place sort
    83ef6e1a7 <liyafan82>  Hold pivot buffer with an integer
    5674d389e <liyafan82>  Adjust interfaces
    4a6b0866b <liyafan82>  Resolve style problems
    0fddb9fde <liyafan82> Merge branch 'master' into fly_0613_sort
    1f4187560 <liyafan82>  Move the API for creating output vectors and make index sorter base on IntVector
    f38b60cd6 <liyafan82>  Resolve comments
    6f9e8ec9b <liyafan82>  Change the constructor of the default sorter
    0d66fd760 <liyafan82>  Change the sorting algorithm
    f675c78a2 <liyafan82>  Provide interfaces and initial implementations for vector sorting
---
 java/algorithm/pom.xml                             |  41 +++++++
 .../algorithm/sort/DefaultVectorComparators.java   |  80 ++++++++++++++
 .../sort/FixedWidthOutOfPlaceVectorSorter.java     |  68 ++++++++++++
 .../apache/arrow/algorithm/sort/IndexSorter.java   |  87 +++++++++++++++
 .../algorithm/sort/OutOfPlaceVectorSorter.java     |  37 +++++++
 .../sort/VariableWidthOutOfPlaceVectorSorter.java  |  76 +++++++++++++
 .../algorithm/sort/VectorValueComparator.java      | 118 +++++++++++++++++++++
 .../sort/TestFixedWidthOutOfPlaceVectorSorter.java |  93 ++++++++++++++++
 .../arrow/algorithm/sort/TestIndexSorter.java      |  83 +++++++++++++++
 .../TestVariableWidthOutOfPlaceVectorSorter.java   |  97 +++++++++++++++++
 java/pom.xml                                       |   1 +
 11 files changed, 781 insertions(+)

diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
new file mode 100644
index 0000000..5a6108b
--- /dev/null
+++ b/java/algorithm/pom.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.arrow</groupId>
+    <artifactId>arrow-java-root</artifactId>
+    <version>0.14.0-SNAPSHOT</version>
+  </parent>
+  <artifactId>arrow-algorithm</artifactId>
+  <name>Arrow Algorithms</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-vector</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-common</artifactId>
+    </dependency>
+  </dependencies>
+
+  <build>
+  </build>
+</project>
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
new file mode 100644
index 0000000..e16b9ec
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.holders.NullableVarCharHolder;
+
+/**
+ * Default comparator implementations for different types of vectors.
+ */
+public class DefaultVectorComparators {
+
+  /**
+   * Default comparator for 32-bit integers.
+   * The comparison is based on int values, with null comes first.
+   */
+  public static class IntComparator extends VectorValueComparator<IntVector> {
+
+    public IntComparator() {
+      super(Integer.SIZE / 8);
+    }
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      int value1 = vector1.get(index1);
+      int value2 = vector2.get(index2);
+      return value1 - value2;
+    }
+  }
+
+  /**
+   * Default comparator for varchars.
+   * The comparison is in lexicographic order, with null comes first.
+   */
+  public static class VarCharComparator extends VectorValueComparator<VarCharVector> {
+
+    private NullableVarCharHolder holder1 = new NullableVarCharHolder();
+    private NullableVarCharHolder holder2 = new NullableVarCharHolder();
+
+    @Override
+    public int compareNotNull(int index1, int index2) {
+      vector1.get(index1, holder1);
+      vector2.get(index2, holder2);
+
+      int length1 = holder1.end - holder1.start;
+      int length2 = holder2.end - holder2.start;
+
+      int minLength = length1 < length2 ? length1 : length2;
+      for (int i = 0; i < minLength; i++) {
+        byte b1 = holder1.buffer.getByte(holder1.start + i);
+        byte b2 = holder2.buffer.getByte(holder2.start + i);
+
+        if (b1 != b2) {
+          return b1 - b2;
+        }
+      }
+
+      return length1 - length2;
+    }
+  }
+
+  private DefaultVectorComparators() {
+  }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..a02695a
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+
+import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Default out-of-place sorter for fixed-width vectors.
+ * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class FixedWidthOutOfPlaceVectorSorter<V extends BaseFixedWidthVector> implements OutOfPlaceVectorSorter<V> {
+
+  protected IndexSorter<V> indexSorter = new IndexSorter<>();
+
+  @Override
+  public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> comparator) {
+    comparator.attachVector(srcVector);
+
+    int valueWidth = comparator.getValueWidth();
+
+    // buffers referenced in the sort
+    ArrowBuf srcValueBuffer = srcVector.getDataBuffer();
+    ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer();
+    ArrowBuf dstValueBuffer = dstVector.getDataBuffer();
+
+    // sort value indices
+    try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
+      sortedIndices.allocateNew(srcVector.getValueCount());
+      sortedIndices.setValueCount(srcVector.getValueCount());
+      indexSorter.sort(srcVector, sortedIndices, comparator);
+
+      // copy sorted values to the output vector
+      for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) {
+        int srcIndex = sortedIndices.get(dstIndex);
+        if (srcVector.isNull(srcIndex)) {
+          BitVectorHelper.setValidityBit(dstValidityBuffer, dstIndex, 0);
+        } else {
+          BitVectorHelper.setValidityBit(dstValidityBuffer, dstIndex, 1);
+          PlatformDependent.copyMemory(
+                  srcValueBuffer.memoryAddress() + srcIndex * valueWidth,
+                  dstValueBuffer.memoryAddress() + dstIndex * valueWidth,
+                  valueWidth);
+        }
+      }
+    }
+  }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
new file mode 100644
index 0000000..d85eb6f
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Sorter for the indices of a vector.
+ * @param <V> vector type.
+ */
+public class IndexSorter<V extends ValueVector> {
+
+  /**
+   * Comparator for vector indices.
+   */
+  private VectorValueComparator<V> comparator;
+
+  /**
+   * Vector indices to sort.
+   */
+  private IntVector indices;
+
+  /**
+   * Sorts indices, by quick-sort. Suppose the vector is denoted by v.
+   * After calling this method, the following relations hold:
+   * v(indices[0]) <= v(indices[1]) <= ...
+   * @param vector the vector whose indices need to be sorted.
+   * @param indices the vector for storing the sorted indices.
+   * @param comparator the comparator to sort indices.
+   */
+  public void sort(V vector, IntVector indices, VectorValueComparator<V> comparator) {
+    comparator.attachVector(vector);
+
+    this.indices = indices;
+
+    IntStream.range(0, vector.getValueCount()).forEach(i -> indices.set(i, i));
+
+    this.comparator = comparator;
+
+    quickSort(0, indices.getValueCount() - 1);
+  }
+
+  private void quickSort(int low, int high) {
+    if (low < high) {
+      int mid = partition(low, high);
+      quickSort(low, mid - 1);
+      quickSort(mid + 1, high);
+    }
+  }
+
+  private int partition(int low, int high) {
+    int pivotIndex = indices.get(low);
+
+    while (low < high) {
+      while (low < high && comparator.compare(indices.get(high), pivotIndex) >= 0) {
+        high -= 1;
+      }
+      indices.set(low, indices.get(high));
+
+      while (low < high && comparator.compare(indices.get(low), pivotIndex) <= 0) {
+        low += 1;
+      }
+      indices.set(high, indices.get(low));
+    }
+
+    indices.set(low, pivotIndex);
+    return low;
+  }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..41d6dad
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Basic interface for sorting a vector out-of-place.
+ * That is, the sorting is performed on a newly-created vector,
+ * and the original vector is not modified.
+ * @param <V> the vector type.
+ */
+public interface OutOfPlaceVectorSorter<V extends ValueVector> {
+
+  /**
+   * Sort a vector out-of-place.
+   * @param inVec the input vector.
+   * @param outVec the output vector, which has the same size as the input vector.
+   * @param comparator the criteria for sort.
+   */
+  void sortOutOfPlace(V inVec, V outVec, VectorValueComparator<V> comparator);
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..c6dbc42
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.IntVector;
+
+import io.netty.buffer.ArrowBuf;
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Default sorter for variable-width vectors.
+ * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * @param <V> vector type.
+ */
+public class VariableWidthOutOfPlaceVectorSorter<V extends BaseVariableWidthVector>
+        implements OutOfPlaceVectorSorter<V> {
+
+  protected IndexSorter<V> indexSorter = new IndexSorter<>();
+
+  @Override
+  public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> comparator) {
+    comparator.attachVector(srcVector);
+
+    // buffers referenced in the sort
+    ArrowBuf srcValueBuffer = srcVector.getDataBuffer();
+    ArrowBuf srcOffsetBuffer = srcVector.getOffsetBuffer();
+    ArrowBuf dstValidityBuffer = dstVector.getValidityBuffer();
+    ArrowBuf dstValueBuffer = dstVector.getDataBuffer();
+    ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer();
+
+    // sort value indices
+    try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
+      sortedIndices.allocateNew(srcVector.getValueCount());
+      sortedIndices.setValueCount(srcVector.getValueCount());
+      indexSorter.sort(srcVector, sortedIndices, comparator);
+
+      int dstOffset = 0;
+      dstOffsetBuffer.setInt(0, 0);
+
+      // copy sorted values to the output vector
+      for (int dstIndex = 0; dstIndex < sortedIndices.getValueCount(); dstIndex++) {
+        int srcIndex = sortedIndices.get(dstIndex);
+        if (srcVector.isNull(srcIndex)) {
+          BitVectorHelper.setValidityBit(dstValidityBuffer, dstIndex, 0);
+        } else {
+          BitVectorHelper.setValidityBit(dstValidityBuffer, dstIndex, 1);
+          int srcOffset = srcOffsetBuffer.getInt(srcIndex * BaseVariableWidthVector.OFFSET_WIDTH);
+          int valueLength = srcOffsetBuffer.getInt((srcIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH) - srcOffset;
+          PlatformDependent.copyMemory(
+                  srcValueBuffer.memoryAddress() + srcOffset,
+                  dstValueBuffer.memoryAddress() + dstOffset,
+                  valueLength);
+          dstOffset += valueLength;
+        }
+        dstOffsetBuffer.setInt((dstIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffset);
+      }
+    }
+  }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
new file mode 100644
index 0000000..6094830
--- /dev/null
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Compare two values at the given indices in the vectors.
+ * This is used for vector sorting.
+ * @param <V> type of the vector.
+ */
+public abstract class VectorValueComparator<V extends ValueVector> {
+
+  /**
+   * The first vector to compare.
+   */
+  protected V vector1;
+
+  /**
+   * The second vector to compare.
+   */
+  protected V vector2;
+
+  /**
+   * Width of the vector value. For variable-length vectors, this value makes no sense.
+   */
+  protected int valueWidth;
+
+  /**
+   * Constructor for variable-width vectors.
+   */
+  protected VectorValueComparator() {
+
+  }
+
+  /**
+   * Constructor for fixed-width vectors.
+   * @param valueWidth the record width (in bytes).
+   */
+  protected VectorValueComparator(int valueWidth) {
+    this.valueWidth = valueWidth;
+  }
+
+  public int getValueWidth() {
+    return valueWidth;
+  }
+
+  /**
+   * Attach both vectors to compare to the same input vector.
+   * @param vector the vector to attach.
+   */
+  public void attachVector(V vector) {
+    this.vector1 = vector;
+    this.vector2 = vector;
+  }
+
+  /**
+   * Attach vectors to compare.
+   * @param vector1 the first vector to compare.
+   * @param vector2 the second vector to compare.
+   */
+  public void attachVectors(V vector1, V vector2) {
+    this.vector1 = vector1;
+    this.vector2 = vector2;
+  }
+
+  /**
+   * Compare two values, given their indices.
+   * @param index1 index of the first value to compare.
+   * @param index2 index of the second value to compare.
+   * @return an integer greater than 0, if the first value is greater;
+   *     an integer smaller than 0, if the first value is smaller; or 0, if both
+   *     values are equal.
+   */
+  public int compare(int index1, int index2) {
+    boolean isNull1 = vector1.isNull(index1);
+    boolean isNull2 = vector2.isNull(index2);
+
+    if (isNull1 || isNull2) {
+      if (isNull1 && isNull2) {
+        return 0;
+      } else if (isNull1) {
+        // null is smaller
+        return -1;
+      } else {
+        return 1;
+      }
+    }
+    return compareNotNull(index1, index2);
+  }
+
+  /**
+   * Compare two values, given their indices.
+   * This is a fast path for comparing non-null values, so the caller
+   * must make sure that values at both indices are not null.
+   * @param index1 index of the first value to compare.
+   * @param index2 index of the second value to compare.
+   * @return an integer greater than 0, if the first value is greater;
+   *     an integer smaller than 0, if the first value is smaller; or 0, if both
+   *     values are equal.
+   */
+  public abstract int compareNotNull(int index1, int index2);
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..9133ab6
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link FixedWidthOutOfPlaceVectorSorter}.
+ */
+public class TestFixedWidthOutOfPlaceVectorSorter {
+
+  private BufferAllocator allocator;
+
+  @Before
+  public void prepare() {
+    allocator = new RootAllocator(1024 * 1024);
+  }
+
+  @After
+  public void shutdown() {
+    allocator.close();
+  }
+
+  @Test
+  public void testSortInt() {
+    try (IntVector vec = new IntVector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 10);
+      vec.set(1, 8);
+      vec.setNull(2);
+      vec.set(3, 10);
+      vec.set(4, 12);
+      vec.set(5, 17);
+      vec.setNull(6);
+      vec.set(7, 23);
+      vec.set(8, 35);
+      vec.set(9, 2);
+
+      // sort the vector
+      FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter();
+      DefaultVectorComparators.IntComparator comparator = new DefaultVectorComparators.IntComparator();
+
+      IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getValueCount());
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      Assert.assertEquals(2, sortedVec.get(2));
+      Assert.assertEquals(8, sortedVec.get(3));
+      Assert.assertEquals(10, sortedVec.get(4));
+      Assert.assertEquals(10, sortedVec.get(5));
+      Assert.assertEquals(12, sortedVec.get(6));
+      Assert.assertEquals(17, sortedVec.get(7));
+      Assert.assertEquals(23, sortedVec.get(8));
+      Assert.assertEquals(35, sortedVec.get(9));
+
+      sortedVec.close();
+    }
+  }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java
new file mode 100644
index 0000000..072e58e
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link IndexSorter}.
+ */
+public class TestIndexSorter {
+
+  private BufferAllocator allocator;
+
+  @Before
+  public void prepare() {
+    allocator = new RootAllocator(1024 * 1024);
+  }
+
+  @After
+  public void shutdown() {
+    allocator.close();
+  }
+
+  @Test
+  public void testIndexSort() {
+    try (IntVector vec = new IntVector("", allocator)) {
+      vec.allocateNew(10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, 11);
+      vec.set(1, 8);
+      vec.set(2, 33);
+      vec.set(3, 10);
+      vec.set(4, 12);
+      vec.set(5, 17);
+      vec.setNull(6);
+      vec.set(7, 23);
+      vec.set(8, 35);
+      vec.set(9, 2);
+
+      // sort the index
+      IndexSorter<IntVector> indexSorter = new IndexSorter<>();
+      DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator();
+      intComparator.attachVector(vec);
+
+      IntVector indices = new IntVector("", allocator);
+      indices.setValueCount(10);
+      indexSorter.sort(vec, indices, intComparator);
+
+      int[] expected = new int[]{6, 9, 1, 3, 0, 4, 5, 7, 2, 8};
+
+      for (int i = 0; i < expected.length; i++) {
+        assertTrue(!indices.isNull(i));
+        assertEquals(expected[i], indices.get(i));
+      }
+      indices.close();
+    }
+  }
+}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
new file mode 100644
index 0000000..68be254
--- /dev/null
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.algorithm.sort;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VariableWidthOutOfPlaceVectorSorter}.
+ */
+public class TestVariableWidthOutOfPlaceVectorSorter {
+
+  private BufferAllocator allocator;
+
+  @Before
+  public void prepare() {
+    allocator = new RootAllocator(1024 * 1024);
+  }
+
+  @After
+  public void shutdown() {
+    allocator.close();
+  }
+
+  @Test
+  public void testSortString() {
+    try (VarCharVector vec = new VarCharVector("", allocator)) {
+      vec.allocateNew(100, 10);
+      vec.setValueCount(10);
+
+      // fill data to sort
+      vec.set(0, "hello".getBytes());
+      vec.set(1, "abc".getBytes());
+      vec.setNull(2);
+      vec.set(3, "world".getBytes());
+      vec.set(4, "12".getBytes());
+      vec.set(5, "dictionary".getBytes());
+      vec.setNull(6);
+      vec.set(7, "hello".getBytes());
+      vec.set(8, "good".getBytes());
+      vec.set(9, "yes".getBytes());
+
+      // sort the vector
+      VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
+      DefaultVectorComparators.VarCharComparator comparator = new DefaultVectorComparators.VarCharComparator();
+
+      VarCharVector sortedVec =
+              (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null);
+      sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount());
+      sortedVec.setLastSet(vec.getValueCount() - 1);
+      sortedVec.setValueCount(vec.getValueCount());
+
+      sorter.sortOutOfPlace(vec, sortedVec, comparator);
+
+      // verify results
+      Assert.assertEquals(vec.getValueCount(), sortedVec.getValueCount());
+      Assert.assertEquals(vec.getByteCapacity(), sortedVec.getByteCapacity());
+      Assert.assertEquals(vec.getLastSet(), sortedVec.getLastSet());
+
+      assertTrue(sortedVec.isNull(0));
+      assertTrue(sortedVec.isNull(1));
+      assertEquals("12", new String(sortedVec.get(2)));
+      assertEquals("abc", new String(sortedVec.get(3)));
+      assertEquals("dictionary", new String(sortedVec.get(4)));
+      assertEquals("good", new String(sortedVec.get(5)));
+      assertEquals("hello", new String(sortedVec.get(6)));
+      assertEquals("hello", new String(sortedVec.get(7)));
+      assertEquals("world", new String(sortedVec.get(8)));
+      assertEquals("yes", new String(sortedVec.get(9)));
+
+      sortedVec.close();
+    }
+  }
+}
diff --git a/java/pom.xml b/java/pom.xml
index 385d391..540b41b 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -654,6 +654,7 @@
     <module>plasma</module>
     <module>flight</module>
     <module>performance</module>
+    <module>algorithm</module>
   </modules>
 
   <profiles>