You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2021/11/23 12:13:48 UTC

[lucene] branch main updated: Javadocs, Sorter impls (#426)

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 0fcf9c8  Javadocs, Sorter impls (#426)
0fcf9c8 is described below

commit 0fcf9c825f1f76588040f592d7f893899cc715cd
Author: David Smiley <ds...@apache.org>
AuthorDate: Tue Nov 23 07:13:40 2021 -0500

    Javadocs, Sorter impls (#426)
    
    * Javadocs, Sorter impls
    * clarify which sorts are stable/not
    * link from utility methods to the primary Sorter implementations for further information
    * describe when InPlaceMergeSorter is useful.  Fix incorrect statement that is uses insertion sort.
    
    * Javadocs for Sorter
---
 .../src/java/org/apache/lucene/util/ArrayUtil.java   | 12 ++++++++++++
 .../java/org/apache/lucene/util/CollectionUtil.java  |  4 ++++
 .../org/apache/lucene/util/InPlaceMergeSorter.java   |  6 +++++-
 .../src/java/org/apache/lucene/util/IntroSorter.java |  5 +++--
 .../java/org/apache/lucene/util/MSBRadixSorter.java  |  3 ++-
 .../core/src/java/org/apache/lucene/util/Sorter.java | 20 +++++++++++++++++---
 .../src/java/org/apache/lucene/util/TimSorter.java   |  6 +++---
 7 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
index 100c878..851ff76 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
@@ -432,6 +432,7 @@ public final class ArrayUtil {
    * Sorts the given array slice using the {@link Comparator}. This method uses the intro sort
    * algorithm, but falls back to insertion sort for small arrays.
    *
+   * @see IntroSorter
    * @param fromIndex start index (inclusive)
    * @param toIndex end index (exclusive)
    */
@@ -443,6 +444,8 @@ public final class ArrayUtil {
   /**
    * Sorts the given array using the {@link Comparator}. This method uses the intro sort algorithm,
    * but falls back to insertion sort for small arrays.
+   *
+   * @see IntroSorter
    */
   public static <T> void introSort(T[] a, Comparator<? super T> comp) {
     introSort(a, 0, a.length, comp);
@@ -452,6 +455,7 @@ public final class ArrayUtil {
    * Sorts the given array slice in natural order. This method uses the intro sort algorithm, but
    * falls back to insertion sort for small arrays.
    *
+   * @see IntroSorter
    * @param fromIndex start index (inclusive)
    * @param toIndex end index (exclusive)
    */
@@ -464,6 +468,8 @@ public final class ArrayUtil {
   /**
    * Sorts the given array in natural order. This method uses the intro sort algorithm, but falls
    * back to insertion sort for small arrays.
+   *
+   * @see IntroSorter
    */
   public static <T extends Comparable<? super T>> void introSort(T[] a) {
     introSort(a, 0, a.length);
@@ -475,6 +481,7 @@ public final class ArrayUtil {
    * Sorts the given array slice using the {@link Comparator}. This method uses the Tim sort
    * algorithm, but falls back to binary sort for small arrays.
    *
+   * @see TimSorter
    * @param fromIndex start index (inclusive)
    * @param toIndex end index (exclusive)
    */
@@ -486,6 +493,8 @@ public final class ArrayUtil {
   /**
    * Sorts the given array using the {@link Comparator}. This method uses the Tim sort algorithm,
    * but falls back to binary sort for small arrays.
+   *
+   * @see TimSorter
    */
   public static <T> void timSort(T[] a, Comparator<? super T> comp) {
     timSort(a, 0, a.length, comp);
@@ -495,6 +504,7 @@ public final class ArrayUtil {
    * Sorts the given array slice in natural order. This method uses the Tim sort algorithm, but
    * falls back to binary sort for small arrays.
    *
+   * @see TimSorter
    * @param fromIndex start index (inclusive)
    * @param toIndex end index (exclusive)
    */
@@ -506,6 +516,8 @@ public final class ArrayUtil {
   /**
    * Sorts the given array in natural order. This method uses the Tim sort algorithm, but falls back
    * to binary sort for small arrays.
+   *
+   * @see TimSorter
    */
   public static <T extends Comparable<? super T>> void timSort(T[] a) {
     timSort(a, 0, a.length);
diff --git a/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java b/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java
index 31e4469..bc1eab3 100644
--- a/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/CollectionUtil.java
@@ -127,6 +127,7 @@ public final class CollectionUtil {
    * implement {@link RandomAccess}. This method uses the intro sort algorithm, but falls back to
    * insertion sort for small lists.
    *
+   * @see IntroSorter
    * @throws IllegalArgumentException if list is e.g. a linked list without random access.
    */
   public static <T> void introSort(List<T> list, Comparator<? super T> comp) {
@@ -140,6 +141,7 @@ public final class CollectionUtil {
    * RandomAccess}. This method uses the intro sort algorithm, but falls back to insertion sort for
    * small lists.
    *
+   * @see IntroSorter
    * @throws IllegalArgumentException if list is e.g. a linked list without random access.
    */
   public static <T extends Comparable<? super T>> void introSort(List<T> list) {
@@ -155,6 +157,7 @@ public final class CollectionUtil {
    * implement {@link RandomAccess}. This method uses the Tim sort algorithm, but falls back to
    * binary sort for small lists.
    *
+   * @see TimSorter
    * @throws IllegalArgumentException if list is e.g. a linked list without random access.
    */
   public static <T> void timSort(List<T> list, Comparator<? super T> comp) {
@@ -168,6 +171,7 @@ public final class CollectionUtil {
    * RandomAccess}. This method uses the Tim sort algorithm, but falls back to binary sort for small
    * lists.
    *
+   * @see TimSorter
    * @throws IllegalArgumentException if list is e.g. a linked list without random access.
    */
   public static <T extends Comparable<? super T>> void timSort(List<T> list) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
index d6ba262..e643311 100644
--- a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
@@ -18,7 +18,11 @@ package org.apache.lucene.util;
 
 /**
  * {@link Sorter} implementation based on the merge-sort algorithm that merges in place (no extra
- * memory will be allocated). Small arrays are sorted with insertion sort.
+ * memory will be allocated). Small arrays are sorted with binary sort.
+ *
+ * <p>This algorithm is stable. It's especially suited to sorting small lists where we'd rather
+ * optimize for avoiding allocating memory for this task. It performs well on lists that are already
+ * sorted.
  *
  * @lucene.internal
  */
diff --git a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
index dbd003f..4664ab6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
@@ -24,8 +24,9 @@ package org.apache.lucene.util;
  * median-of-medians, and partitions using Bentley-McIlroy 3-way partitioning. Small ranges are
  * sorted with insertion sort.
  *
- * <p>This sort algorithm is fast on most data shapes, especially with low cardinality. If the data
- * to sort is known to be strictly ascending or descending, prefer {@link TimSorter}.
+ * <p>This algorithm is <b>NOT</b> stable. It's fast on most data shapes, especially with low
+ * cardinality. If the data to sort is known to be strictly ascending or descending, prefer {@link
+ * TimSorter}.
  *
  * @lucene.internal
  */
diff --git a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
index b81bc97..ebfc310 100644
--- a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
@@ -21,7 +21,8 @@ import java.util.Arrays;
 /**
  * Radix sorter for variable-length strings. This class sorts based on the most significant byte
  * first and falls back to {@link IntroSorter} when the size of the buckets to sort becomes small.
- * It is <b>NOT</b> stable. Worst-case memory usage is about {@code 2.3 KB}.
+ *
+ * <p>This algorithm is <b>NOT</b> stable. Worst-case memory usage is about {@code 2.3 KB}.
  *
  * @lucene.internal
  */
diff --git a/lucene/core/src/java/org/apache/lucene/util/Sorter.java b/lucene/core/src/java/org/apache/lucene/util/Sorter.java
index f0d2fc9..ce96cb2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/Sorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Sorter.java
@@ -21,6 +21,18 @@ import java.util.Comparator;
 /**
  * Base class for sorting algorithms implementations.
  *
+ * <p>There are a number of subclasses to choose from that vary in performance and <a
+ * href="https://en.wikipedia.org/wiki/Sorting_algorithm#Stability">stability</a>. We suggest that
+ * you pick the first from this ranked list that meets your requirements:
+ *
+ * <ol>
+ *   <li>{@link MSBRadixSorter} for strings (array of bytes/chars). Not a stable sort.
+ *   <li>{@link StableMSBRadixSorter} for strings (array of bytes/chars). Stable sort.
+ *   <li>{@link IntroSorter}. Not a stable sort.
+ *   <li>{@link InPlaceMergeSorter}. When the data to sort is typically small. Stable sort.
+ *   <li>{@link TimSorter}. Stable sort.
+ * </ol>
+ *
  * @lucene.internal
  */
 public abstract class Sorter {
@@ -193,7 +205,8 @@ public abstract class Sorter {
   /**
    * A binary sort implementation. This performs {@code O(n*log(n))} comparisons and {@code O(n^2)}
    * swaps. It is typically used by more sophisticated implementations as a fall-back when the
-   * number of items to sort has become less than {@value #BINARY_SORT_THRESHOLD}.
+   * number of items to sort has become less than {@value #BINARY_SORT_THRESHOLD}. This algorithm is
+   * stable.
    */
   void binarySort(int from, int to) {
     binarySort(from, to, from + 1);
@@ -222,7 +235,7 @@ public abstract class Sorter {
   /**
    * Sorts between from (inclusive) and to (exclusive) with insertion sort. Runs in {@code O(n^2)}.
    * It is typically used by more sophisticated implementations as a fall-back when the number of
-   * items to sort becomes less than {@value #INSERTION_SORT_THRESHOLD}.
+   * items to sort becomes less than {@value #INSERTION_SORT_THRESHOLD}. This algorithm is stable.
    */
   void insertionSort(int from, int to) {
     for (int i = from + 1; i < to; ) {
@@ -240,7 +253,8 @@ public abstract class Sorter {
 
   /**
    * Use heap sort to sort items between {@code from} inclusive and {@code to} exclusive. This runs
-   * in {@code O(n*log(n))} and is used as a fall-back by {@link IntroSorter}.
+   * in {@code O(n*log(n))} and is used as a fall-back by {@link IntroSorter}. This algorithm is NOT
+   * stable.
    */
   void heapSort(int from, int to) {
     if (to - from <= 1) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
index f3f32c2..e4dca24 100644
--- a/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
@@ -20,10 +20,10 @@ import java.util.Arrays;
 
 /**
  * {@link Sorter} implementation based on the <a
- * href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">TimSort</a> algorithm.
+ * href="http://svn.python.org/projects/python/trunk/Objects/listsort.txt">TimSort</a> algorithm. It
+ * sorts small arrays with a binary sort.
  *
- * <p>This implementation is especially good at sorting partially-sorted arrays and sorts small
- * arrays with binary sort.
+ * <p>This algorithm is stable. It's especially good at sorting partially-sorted arrays.
  *
  * <p><b>NOTE</b>:There are a few differences with the original implementation:
  *