You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2021/11/23 12:13:48 UTC
[lucene] branch main updated: Javadocs, Sorter impls (#426)
This is an automated email from the ASF dual-hosted git repository.
dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 0fcf9c8 Javadocs, Sorter impls (#426)
0fcf9c8 is described below
commit 0fcf9c825f1f76588040f592d7f893899cc715cd
Author: David Smiley <ds...@apache.org>
AuthorDate: Tue Nov 23 07:13:40 2021 -0500
Javadocs, Sorter impls (#426)
* Javadocs, Sorter impls
* clarify which sorts are stable/not
* link from utility methods to the primary Sorter implementations for further information
* describe when InPlaceMergeSorter is useful. Fix incorrect statement that is uses insertion sort.
* Javadocs for Sorter
---
.../src/java/org/apache/lucene/util/ArrayUtil.java | 12 ++++++++++++
.../java/org/apache/lucene/util/CollectionUtil.java | 4 ++++
.../org/apache/lucene/util/InPlaceMergeSorter.java | 6 +++++-
.../src/java/org/apache/lucene/util/IntroSorter.java | 5 +++--
.../java/org/apache/lucene/util/MSBRadixSorter.java | 3 ++-
.../core/src/java/org/apache/lucene/util/Sorter.java | 20 +++++++++++++++++---
.../src/java/org/apache/lucene/util/TimSorter.java | 6 +++---
7 files changed, 46 insertions(+), 10 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
index 100c878..851ff76 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
@@ -432,6 +432,7 @@ public final class ArrayUtil {
* Sorts the given array slice using the {@link Comparator}. This method uses the intro sort
* algorithm, but falls back to insertion sort for small arrays.
*
+ * @see IntroSorter
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
@@ -443,6 +444,8 @@ public final class ArrayUtil {
/**
* Sorts the given array using the {@link Comparator}. This method uses the intro sort algorithm,
* but falls back to insertion sort for small arrays.
+ *
+ * @see IntroSorter
*/
public static <T> void introSort(T[] a, Comparator<? super T> comp) {
introSort(a, 0, a.length, comp);
@@ -452,6 +455,7 @@ public final class ArrayUtil {
* Sorts the given array slice in natural order. This method uses the intro sort algorithm, but
* falls back to insertion sort for small arrays.
*
+ * @see IntroSorter
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
@@ -464,6 +468,8 @@ public final class ArrayUtil {
/**
* Sorts the given array in natural order. This method uses the intro sort algorithm, but falls
* back to insertion sort for small arrays.
+ *
+ * @see IntroSorter
*/
public static <T extends Comparable<? super T>> void introSort(T[] a) {
introSort(a, 0, a.length);
@@ -475,6 +481,7 @@ public final class ArrayUtil {
* Sorts the given array slice using the {@link Comparator}. This method uses the Tim sort
* algorithm, but falls back to binary sort for small arrays.
*
+ * @see TimSorter
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
@@ -486,6 +493,8 @@ public final class ArrayUtil {
/**
* Sorts the given array using the {@link Comparator}. This method uses the Tim sort algorithm,
* but falls back to binary sort for small arrays.
+ *
+ * @see TimSorter
*/
public static <T> void timSort(T[] a, Comparator<? super T> comp) {
timSort(a, 0, a.length, comp);
@@ -495,6 +504,7 @@ public final class ArrayUtil {
* Sorts the given array slice in natural order. This method uses the Tim sort algorithm, but
* falls back to binary sort for small arrays.
*
+ * @see TimSorter
* @param fromIndex start index (inclusive)
* @param toIndex end index (exclusive)
*/
@@ -506,6 +516,8 @@ public final class ArrayUtil {
/**
* Sorts the given array in natural order. This method uses the Tim sort algorithm, but falls back
* to binary sort for small arrays.
+ *
+ * @see TimSorter
*/
public static <T extends Comparable<? super T>> void timSort(T[] a) {
timSort(a, 0, a.length);
diff --git a/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java b/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java
index 31e4469..bc1eab3 100644
--- a/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java
@@ -127,6 +127,7 @@ public final class CollectionUtil {
* implement {@link RandomAccess}. This method uses the intro sort algorithm, but falls back to
* insertion sort for small lists.
*
+ * @see IntroSorter
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T> void introSort(List<T> list, Comparator<? super T> comp) {
@@ -140,6 +141,7 @@ public final class CollectionUtil {
* RandomAccess}. This method uses the intro sort algorithm, but falls back to insertion sort for
* small lists.
*
+ * @see IntroSorter
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T extends Comparable<? super T>> void introSort(List<T> list) {
@@ -155,6 +157,7 @@ public final class CollectionUtil {
* implement {@link RandomAccess}. This method uses the Tim sort algorithm, but falls back to
* binary sort for small lists.
*
+ * @see TimSorter
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T> void timSort(List<T> list, Comparator<? super T> comp) {
@@ -168,6 +171,7 @@ public final class CollectionUtil {
* RandomAccess}. This method uses the Tim sort algorithm, but falls back to binary sort for small
* lists.
*
+ * @see TimSorter
* @throws IllegalArgumentException if list is e.g. a linked list without random access.
*/
public static <T extends Comparable<? super T>> void timSort(List<T> list) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
index d6ba262..e643311 100644
--- a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
@@ -18,7 +18,11 @@ package org.apache.lucene.util;
/**
* {@link Sorter} implementation based on the merge-sort algorithm that merges in place (no extra
- * memory will be allocated). Small arrays are sorted with insertion sort.
+ * memory will be allocated). Small arrays are sorted with binary sort.
+ *
+ * <p>This algorithm is stable. It's especially suited to sorting small lists where we'd rather
+ * optimize for avoiding allocating memory for this task. It performs well on lists that are already
+ * sorted.
*
* @lucene.internal
*/
diff --git a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
index dbd003f..4664ab6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
@@ -24,8 +24,9 @@ package org.apache.lucene.util;
* median-of-medians, and partitions using Bentley-McIlroy 3-way partitioning. Small ranges are
* sorted with insertion sort.
*
- * <p>This sort algorithm is fast on most data shapes, especially with low cardinality. If the data
- * to sort is known to be strictly ascending or descending, prefer {@link TimSorter}.
+ * <p>This algorithm is <b>NOT</b> stable. It's fast on most data shapes, especially with low
+ * cardinality. If the data to sort is known to be strictly ascending or descending, prefer {@link
+ * TimSorter}.
*
* @lucene.internal
*/
diff --git a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
index b81bc97..ebfc310 100644
--- a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
@@ -21,7 +21,8 @@ import java.util.Arrays;
/**
* Radix sorter for variable-length strings. This class sorts based on the most significant byte
* first and falls back to {@link IntroSorter} when the size of the buckets to sort becomes small.
- * It is <b>NOT</b> stable. Worst-case memory usage is about {@code 2.3 KB}.
+ *
+ * <p>This algorithm is <b>NOT</b> stable. Worst-case memory usage is about {@code 2.3 KB}.
*
* @lucene.internal
*/
diff --git a/lucene/core/src/java/org/apache/lucene/util/Sorter.java b/lucene/core/src/java/org/apache/lucene/util/Sorter.java
index f0d2fc9..ce96cb2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/Sorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Sorter.java
@@ -21,6 +21,18 @@ import java.util.Comparator;
/**
* Base class for sorting algorithms implementations.
*
+ * <p>There are a number of subclasses to choose from that vary in performance and <a
+ * href="https://en.wikipedia.org/wiki/Sorting_algorithm#Stability">stability</a>. We suggest that
+ * you pick the first from this ranked list that meets your requirements:
+ *
+ * <ol>
+ * <li>{@link MSBRadixSorter} for strings (array of bytes/chars). Not a stable sort.
+ * <li>{@link StableMSBRadixSorter} for strings (array of bytes/chars). Stable sort.
+ * <li>{@link IntroSorter}. Not a stable sort.
+ * <li>{@link InPlaceMergeSorter}. When the data to sort is typically small. Stable sort.
+ * <li>{@link TimSorter}. Stable sort.
+ * </ol>
+ *
* @lucene.internal
*/
public abstract class Sorter {
@@ -193,7 +205,8 @@ public abstract class Sorter {
/**
* A binary sort implementation. This performs {@code O(n*log(n))} comparisons and {@code O(n^2)}
* swaps. It is typically used by more sophisticated implementations as a fall-back when the
- * number of items to sort has become less than {@value #BINARY_SORT_THRESHOLD}.
+ * number of items to sort has become less than {@value #BINARY_SORT_THRESHOLD}. This algorithm is
+ * stable.
*/
void binarySort(int from, int to) {
binarySort(from, to, from + 1);
@@ -222,7 +235,7 @@ public abstract class Sorter {
/**
* Sorts between from (inclusive) and to (exclusive) with insertion sort. Runs in {@code O(n^2)}.
* It is typically used by more sophisticated implementations as a fall-back when the number of
- * items to sort becomes less than {@value #INSERTION_SORT_THRESHOLD}.
+ * items to sort becomes less than {@value #INSERTION_SORT_THRESHOLD}. This algorithm is stable.
*/
void insertionSort(int from, int to) {
for (int i = from + 1; i < to; ) {
@@ -240,7 +253,8 @@ public abstract class Sorter {
/**
* Use heap sort to sort items between {@code from} inclusive and {@code to} exclusive. This runs
- * in {@code O(n*log(n))} and is used as a fall-back by {@link IntroSorter}.
+ * in {@code O(n*log(n))} and is used as a fall-back by {@link IntroSorter}. This algorithm is NOT
+ * stable.
*/
void heapSort(int from, int to) {
if (to - from <= 1) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
index f3f32c2..e4dca24 100644
--- a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
@@ -20,10 +20,10 @@ import java.util.Arrays;
/**
* {@link Sorter} implementation based on the <a
- * href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">TimSort</a> algorithm.
+ * href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">TimSort</a> algorithm. It
+ * sorts small arrays with a binary sort.
*
- * <p>This implementation is especially good at sorting partially-sorted arrays and sorts small
- * arrays with binary sort.
+ * <p>This algorithm is stable. It's especially good at sorting partially-sorted arrays.
*
* <p><b>NOTE</b>:There are a few differences with the original implementation:
*