You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2009/09/16 01:26:39 UTC

svn commit: r815563 - in /lucene/lucy/trunk/core/Lucy/Util: SortUtils.bp SortUtils.c

Author: marvin
Date: Tue Sep 15 23:26:38 2009
New Revision: 815563

URL: http://svn.apache.org/viewvc?rev=815563&view=rev
Log:
Commit LUCY-49, adding context-aware mergesort and quicksort implementations.

Added:
    lucene/lucy/trunk/core/Lucy/Util/SortUtils.bp   (with props)
    lucene/lucy/trunk/core/Lucy/Util/SortUtils.c   (with props)

Added: lucene/lucy/trunk/core/Lucy/Util/SortUtils.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Util/SortUtils.bp?rev=815563&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Util/SortUtils.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Util/SortUtils.bp Tue Sep 15 23:26:38 2009
@@ -0,0 +1,83 @@
+parcel Lucy;
+
+__C__
+typedef int 
+(*lucy_Sort_compare_t)(void *context, const void *va, const void *vb);
+#ifdef LUCY_USE_SHORT_NAMES
+  #define Sort_compare_t            lucy_Sort_compare_t
+#endif
+__END_C__
+
+/** Specialized sorting routines.
+ * 
+ * SortUtils provides a merge sort algorithm which allows access to its
+ * internals, enabling specialized functions to jump in and only execute part
+ * of the sort.
+ * 
+ * SortUtils also provides a quicksort with an additional context argument.
+ */
+inert class Lucy::Util::SortUtils cnick Sort {
+
+    /** Perform a mergesort.  In addition to providing a contiguous array of
+     * elements to be sorted and their count, the caller must also provide a
+     * scratch buffer with room for at least as many elements as are to be
+     * sorted.
+     */
+    inert void
+    mergesort(void *elems, void *scratch, u32_t num_elems, u32_t width,
+              Sort_compare_t compare, void *context);
+
+    /** Standard mergesort function.
+     */
+    inert void
+    do_sort(void *elems, void *scratch, u32_t left, u32_t right,
+            Sort_compare_t compare, void *context);
+
+    /** Merge two source arrays together using the classic mergesort merge
+     * algorithm, storing the result in <code>dest</code>.
+     * 
+     * Most merge functions operate on a single contiguous array and copy the
+     * merged results results back into the source array before returning.
+     * These two differ in that it is possible to operate on two discontiguous
+     * source arrays.  Copying the results back into the source array is the
+     * responsibility of the caller.
+     * 
+     * Lucy's external sort takes advantage of this when it is reading
+     * back pre-sorted runs from disk and merging the streams into a
+     * consolidated buffer.
+     * 
+     * merge4 merges elements which are 4 bytes in size; merge8 merges 8-byte
+     * elements.
+     */
+    inert void
+    merge4(void *left_ptr,  u32_t left_num_elems,
+           void *right_ptr, u32_t right_num_elems,
+           void *dest, Sort_compare_t compare, void *context);
+    inert void
+    merge8(void *left_ptr,  u32_t left_num_elems,
+           void *right_ptr, u32_t right_num_elems,
+           void *dest, Sort_compare_t compare, void *context);
+
+
+    /** Quicksort. 
+     */
+    inert void
+    quicksort(void *elems, size_t num_elems, size_t width, 
+              Sort_compare_t compare, void *context);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Util/SortUtils.bp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Util/SortUtils.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Util/SortUtils.c?rev=815563&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Util/SortUtils.c (added)
+++ lucene/lucy/trunk/core/Lucy/Util/SortUtils.c Tue Sep 15 23:26:38 2009
@@ -0,0 +1,454 @@
+#define C_LUCY_SORTUTILS
+#define LUCY_USE_SHORT_NAMES
+#define CHY_USE_SHORT_NAMES
+
+#include <string.h>
+#include "Lucy/Util/SortUtils.h"
+#include "Lucy/Object/Err.h"
+
+/* Define four-byte and eight-byte types so that we can dereference void
+ * pointers like integer pointers.  The only significance of using i32_t and
+ * i64_t is that they are 4 and 8 bytes.
+ */
+#define FOUR_BYTE_TYPE  i32_t
+#define EIGHT_BYTE_TYPE i64_t
+
+/***************************** mergesort ************************************/
+
+/* Recursive merge sorting functions. 
+ */
+static void
+S_msort4(FOUR_BYTE_TYPE *elems, FOUR_BYTE_TYPE *scratch,
+         u32_t left, u32_t right, Sort_compare_t compare, void *context);
+static void
+S_msort8(EIGHT_BYTE_TYPE *elems, EIGHT_BYTE_TYPE *scratch,
+         u32_t left, u32_t right, Sort_compare_t compare, void *context);
+
+/* Static inline versions of Sort_merge4 and Sort_merge8.
+ */
+static INLINE void
+SI_merge4(FOUR_BYTE_TYPE *left_ptr,  u32_t left_size,
+          FOUR_BYTE_TYPE *right_ptr, u32_t right_size,
+          FOUR_BYTE_TYPE *dest, Sort_compare_t compare, void *context);
+static INLINE void
+SI_merge8(EIGHT_BYTE_TYPE *left_ptr,  u32_t left_size,
+          EIGHT_BYTE_TYPE *right_ptr, u32_t right_size,
+          EIGHT_BYTE_TYPE *dest, Sort_compare_t compare, void *context);
+
+void
+Sort_mergesort(void *elems, void *scratch, u32_t num_elems, u32_t width,
+               Sort_compare_t compare, void *context) 
+{
+    /* Arrays of 0 or 1 items are already sorted. */
+    if (num_elems < 2) { return; }
+
+    /* Validate. */
+    if (num_elems >= I32_MAX) {
+        THROW(ERR, "Provided %u64 elems, but can't handle more than %i32",
+            (u64_t)num_elems, I32_MAX);
+    }
+
+    /* Dispatch by element size. */
+    if (width == 4) {
+        S_msort4(elems, scratch, 0, num_elems - 1, compare, context);
+    }
+    else if (width == 8) {
+        S_msort8(elems, scratch, 0, num_elems - 1, compare, context);
+    }
+    else {
+        THROW(ERR, "Can't sort elements which are %u32 bytes", width);
+    }
+}
+
+void
+Sort_merge4(void *left_ptr,  u32_t left_size,
+            void *right_ptr, u32_t right_size,
+            void *dest, Sort_compare_t compare, void *context) 
+{
+    SI_merge4(left_ptr, left_size, right_ptr, right_size, dest, compare,
+        context);
+}
+
+void
+Sort_merge8(void *left_ptr,  u32_t left_size,
+            void *right_ptr, u32_t right_size,
+            void *dest, Sort_compare_t compare, void *context) 
+{
+    SI_merge8(left_ptr, left_size, right_ptr, right_size, dest, compare,
+        context);
+}
+
+static void
+S_msort4(FOUR_BYTE_TYPE *elems, FOUR_BYTE_TYPE *scratch,
+         u32_t left, u32_t right, Sort_compare_t compare, void *context)
+{
+    if (right > left) {
+        const u32_t mid = ( (right+left)/2 ) + 1;
+        S_msort4(elems, scratch, left, mid - 1, compare, context);
+        S_msort4(elems, scratch, mid,  right, compare, context);
+        Sort_merge4( (elems + left),  (mid - left), 
+            (elems + mid), (right - mid + 1), scratch, compare, context);
+        memcpy((elems + left), scratch,
+            ((right - left + 1) * sizeof(FOUR_BYTE_TYPE)) );
+    }
+}
+
+static void
+S_msort8(EIGHT_BYTE_TYPE *elems, EIGHT_BYTE_TYPE *scratch,
+         u32_t left, u32_t right, Sort_compare_t compare, void *context)
+{
+    if (right > left) {
+        const u32_t mid = ( (right+left)/2 ) + 1;
+        S_msort8(elems, scratch, left, mid - 1, compare, context);
+        S_msort8(elems, scratch, mid,  right, compare, context);
+        Sort_merge8( (elems + left),  (mid - left), 
+            (elems + mid), (right - mid + 1), scratch, compare, context);
+        memcpy((elems + left), scratch,
+            ((right - left + 1) * sizeof(EIGHT_BYTE_TYPE)) );
+    }
+}
+
+static INLINE void
+SI_merge4(FOUR_BYTE_TYPE *left_ptr,  u32_t left_size,
+          FOUR_BYTE_TYPE *right_ptr, u32_t right_size,
+          FOUR_BYTE_TYPE *dest, Sort_compare_t compare, void *context) 
+{
+    FOUR_BYTE_TYPE *left_limit  = left_ptr  + left_size;
+    FOUR_BYTE_TYPE *right_limit = right_ptr + right_size;
+
+    while (left_ptr < left_limit && right_ptr < right_limit) {
+        if (compare(context, left_ptr, right_ptr) < 1) {
+            *dest++ = *left_ptr++;
+        }
+        else {
+            *dest++ = *right_ptr++;
+        }
+    }
+    while (left_ptr < left_limit) {
+        *dest++ = *left_ptr++;
+    }
+    while (right_ptr < right_limit) {
+        *dest++ = *right_ptr++;
+    }
+}
+
+static INLINE void
+SI_merge8(EIGHT_BYTE_TYPE *left_ptr,  u32_t left_size,
+          EIGHT_BYTE_TYPE *right_ptr, u32_t right_size,
+          EIGHT_BYTE_TYPE *dest, Sort_compare_t compare, void *context)
+{
+    EIGHT_BYTE_TYPE *left_limit  = left_ptr  + left_size;
+    EIGHT_BYTE_TYPE *right_limit = right_ptr + right_size;
+
+    while (left_ptr < left_limit && right_ptr < right_limit) {
+        if (compare(context, left_ptr, right_ptr) < 1) {
+            *dest++ = *left_ptr++;
+        }
+        else {
+            *dest++ = *right_ptr++;
+        }
+    }
+    while (left_ptr < left_limit) {
+        *dest++ = *left_ptr++;
+    }
+    while (right_ptr < right_limit) {
+        *dest++ = *right_ptr++;
+    }
+}
+
+/***************************** quicksort ************************************/
+
+/* Quicksort implementations optimized for four-byte and eight-byte elements.
+ */
+static void 
+S_qsort4(FOUR_BYTE_TYPE *elems, i32_t left, i32_t right,
+         Sort_compare_t compare, void *context);
+static void 
+S_qsort8(EIGHT_BYTE_TYPE *elems, i32_t left, i32_t right,
+         Sort_compare_t compare, void *context);
+
+/* Swap two elements. 
+ */
+static INLINE void
+SI_exchange4(FOUR_BYTE_TYPE *elems, i32_t left, i32_t right);
+static INLINE void
+SI_exchange8(EIGHT_BYTE_TYPE *elems, i32_t left, i32_t right);
+
+/* Select a pivot by choosing the median of three values, guarding against
+ * the worst-case behavior of quicksort.  Place the pivot in the rightmost
+ * slot.
+ *
+ * Possible states:
+ *
+ *   abc => abc => abc => acb
+ *   acb => acb => acb => acb
+ *   bac => abc => abc => acb
+ *   bca => bca => acb => acb
+ *   cba => bca => acb => acb
+ *   cab => acb => acb => acb
+ *   aab => aab => aab => aba
+ *   aba => aba => aba => aba
+ *   baa => aba => aba => aba
+ *   bba => bba => abb => abb
+ *   bab => abb => abb => abb
+ *   abb => abb => abb => abb
+ *   aaa => aaa => aaa => aaa
+ */
+static INLINE FOUR_BYTE_TYPE*
+SI_choose_pivot4(FOUR_BYTE_TYPE *elems, i32_t left, i32_t right,
+                 Sort_compare_t compare, void *context);
+static INLINE EIGHT_BYTE_TYPE*
+SI_choose_pivot8(EIGHT_BYTE_TYPE *elems, i32_t left, i32_t right,
+                 Sort_compare_t compare, void *context);
+
+void
+Sort_quicksort(void *elems, size_t num_elems, size_t width, 
+               Sort_compare_t compare, void *context)
+{
+    /* Arrays of 0 or 1 items are already sorted. */
+    if (num_elems < 2) { return; }
+
+    /* Validate. */
+    if (num_elems >= I32_MAX) {
+        THROW(ERR, "Provided %u64 elems, but can't handle more than %i32",
+            (u64_t)num_elems, I32_MAX);
+    }
+
+    if (width == 4) { 
+        S_qsort4(elems, 0, num_elems - 1, compare, context); 
+    }
+    else if (width == 8) { 
+        S_qsort8(elems, 0, num_elems - 1, compare, context);
+    }
+    else {
+        THROW(ERR, "Unsupported width: %i64", (i64_t)width);
+    }
+}
+
+/************************* quicksort 4 byte *********************************/
+
+static INLINE void
+SI_exchange4(FOUR_BYTE_TYPE *elems, i32_t left, i32_t right)
+{
+    FOUR_BYTE_TYPE saved = elems[left];
+    elems[left]  = elems[right];
+    elems[right] = saved;
+}
+
+static INLINE FOUR_BYTE_TYPE*
+SI_choose_pivot4(FOUR_BYTE_TYPE *elems, i32_t left, i32_t right,
+                 Sort_compare_t compare, void *context)
+{
+    if (right - left > 1) { 
+        i32_t mid = left + (right - left) / 2;
+        if (compare(context, elems + left, elems + mid) > 0) {
+            SI_exchange4(elems, left, mid);
+        }
+        if (compare(context, elems + left, elems + right) > 0) {
+            SI_exchange4(elems, left, right);
+        }
+        if (compare(context, elems + right, elems + mid) > 0) {
+            SI_exchange4(elems, right, mid);
+        }
+    }
+    return elems + right;
+}
+
+static void 
+S_qsort4(FOUR_BYTE_TYPE *elems, i32_t left, i32_t right,
+         Sort_compare_t compare, void *context)
+{ 
+    FOUR_BYTE_TYPE *const pivot 
+        = SI_choose_pivot4(elems, left, right, compare, context);
+    i32_t i = left - 1;
+    i32_t j = right; 
+    i32_t p = left - 1;
+    i32_t q = right; 
+
+    if (right <= left) { return; }
+
+    while (1) {
+        int comparison1;
+        int comparison2;
+
+        /* Find an element from the left that is greater than or equal to the
+         * pivot (i.e. that should move to the right). */
+        while (1) {
+            i++;
+            comparison1 = compare(context, elems + i, pivot);
+            if (comparison1 >= 0) { break; }
+        }
+
+        /* Find an element from the right that is less than or equal to the
+         * pivot (i.e. that should move to the left). */
+        while (1) {
+            j--;
+            comparison2 = compare(context, elems + j, pivot);
+            if (comparison2 <= 0) { break; }
+            if (j == left)         { break; }
+        }
+
+        /* Bail out of loop when we meet in the middle. */
+        if (i >= j) { break; }
+
+        /* Swap the elements we found, so the lesser element moves left and
+         * the greater element moves right. */
+        SI_exchange4(elems, i, j);
+
+        /* Move any elements which test as "equal" to the pivot to the outside
+         * edges of the array. */
+        if (comparison2 == 0) {
+            p++;
+            SI_exchange4(elems, p, i);
+        }
+        if (comparison1 == 0) {
+            q--;
+            SI_exchange4(elems, j, q);
+        }
+    } 
+
+    /* Move "equal" elements from the outside edges to the center. 
+     * 
+     * Before: 
+     * 
+     *    equal  |  less_than  |  greater_than  |  equal
+     * 
+     * After: 
+     * 
+     *    less_than  |       equal       |  greater_than
+     */
+    {
+        i32_t k;
+        SI_exchange4(elems, i, right);
+        j = i - 1;
+        i++;
+        for (k = left; k < p; k++, j--)      { SI_exchange4(elems, k, j); }
+        for (k = right - 1; k > q; k--, i++) { SI_exchange4(elems, i, k); }
+    }
+
+    /* Recurse. */
+    S_qsort4(elems, left, j, compare, context);   /* Sort less_than. */
+    S_qsort4(elems, i, right, compare, context);  /* Sort greater_than. */
+} 
+
+/************************* quicksort 8 byte *********************************/
+
+static INLINE void
+SI_exchange8(EIGHT_BYTE_TYPE *elems, i32_t left, i32_t right)
+{
+    EIGHT_BYTE_TYPE saved = elems[left];
+    elems[left]  = elems[right];
+    elems[right] = saved;
+}
+
+static INLINE EIGHT_BYTE_TYPE*
+SI_choose_pivot8(EIGHT_BYTE_TYPE *elems, i32_t left, i32_t right,
+                 Sort_compare_t compare, void *context)
+{
+    if (right - left > 1) { 
+        i32_t mid = left + (right - left) / 2;
+        if (compare(context, elems + left, elems + mid) > 0) {
+            SI_exchange8(elems, left, mid);
+        }
+        if (compare(context, elems + left, elems + right) > 0) {
+            SI_exchange8(elems, left, right);
+        }
+        if (compare(context, elems + right, elems + mid) > 0) {
+            SI_exchange8(elems, right, mid);
+        }
+    }
+    return elems + right;
+}
+
+static void 
+S_qsort8(EIGHT_BYTE_TYPE *elems, i32_t left, i32_t right,
+         Sort_compare_t compare, void *context)
+{ 
+    EIGHT_BYTE_TYPE *const pivot 
+        = SI_choose_pivot8(elems, left, right, compare, context);
+    i32_t i = left - 1;
+    i32_t j = right; 
+    i32_t p = left - 1;
+    i32_t q = right; 
+
+    if (right <= left) { return; }
+
+    while (1) {
+        int comparison1;
+        int comparison2;
+
+        /* Find an element from the left that is greater than or equal to the
+         * pivot (i.e. that should move to the right). */
+        while (1) {
+            i++;
+            comparison1 = compare(context, elems + i, pivot);
+            if (comparison1 >= 0) { break; }
+        }
+
+        /* Find an element from the right that is less than or equal to the
+         * pivot (i.e. that should move to the left). */
+        while (1) {
+            j--;
+            comparison2 = compare(context, elems + j, pivot);
+            if (comparison2 <= 0) { break; }
+            if (j == left)         { break; }
+        }
+
+        /* Bail out of loop when we meet in the middle. */
+        if (i >= j) { break; }
+
+        /* Swap the elements we found, so the lesser element moves left and
+         * the greater element moves right. */
+        SI_exchange8(elems, i, j);
+
+        /* Move any elements which test as "equal" to the pivot to the outside
+         * edges of the array. */
+        if (comparison2 == 0) {
+            p++;
+            SI_exchange8(elems, p, i);
+        }
+        if (comparison1 == 0) {
+            q--;
+            SI_exchange8(elems, j, q);
+        }
+    } 
+
+    /* Move "equal" elements from the outside edges to the center. 
+     * 
+     * Before: 
+     * 
+     *    equal  |  less_than  |  greater_than  |  equal
+     * 
+     * After: 
+     * 
+     *    less_than  |       equal       |  greater_than
+     */
+    {
+        i32_t k;
+        SI_exchange8(elems, i, right);
+        j = i - 1;
+        i++;
+        for (k = left; k < p; k++, j--)      { SI_exchange8(elems, k, j); }
+        for (k = right - 1; k > q; k--, i++) { SI_exchange8(elems, i, k); }
+    }
+
+    /* Recurse. */
+    S_qsort8(elems, left, j, compare, context);   /* Sort less_than. */
+    S_qsort8(elems, i, right, compare, context);  /* Sort greater_than. */
+} 
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Util/SortUtils.c
------------------------------------------------------------------------------
    svn:eol-style = native