You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2014/07/01 21:35:10 UTC
[5/8] git commit: refs/heads/master - Use counting sort to sort
doc_ids in SortFieldWriter#Refill
Use counting sort to sort doc_ids in SortFieldWriter#Refill
Since we already have the ordinals for each doc_id, we can use a
counting sort. This uses a temporary array of size run_cardinality but
runs in linear time.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/d96ef18e
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/d96ef18e
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/d96ef18e
Branch: refs/heads/master
Commit: d96ef18e4c8e1ef19ce55be5926a0728254df30d
Parents: dd1a198
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 26 19:43:42 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Wed Jun 18 21:23:00 2014 -0700
----------------------------------------------------------------------
core/Lucy/Index/SortFieldWriter.c | 53 +++++++++++++++++++++-------------
1 file changed, 33 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/d96ef18e/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index 0d07ce5..b6faec5 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -347,30 +347,43 @@ SortFieldWriter_Compare_IMP(SortFieldWriter *self, void *va, void *vb) {
return comparison;
}
-static int
-S_compare_doc_ids_by_ord_rev(void *context, const void *va, const void *vb) {
- SortCache *sort_cache = (SortCache*)context;
- int32_t a = *(int32_t*)va;
- int32_t b = *(int32_t*)vb;
- int32_t ord_a = SortCache_Ordinal(sort_cache, a);
- int32_t ord_b = SortCache_Ordinal(sort_cache, b);
- int32_t comparison = ord_a - ord_b;
- if (comparison == 0) { comparison = a - b; }
- return comparison;
-}
-
static void
S_lazy_init_sorted_ids(SortFieldWriter *self) {
SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
- if (!ivars->sorted_ids) {
- ivars->sorted_ids
- = (int32_t*)MALLOCATE((ivars->run_max + 1) * sizeof(int32_t));
- for (int32_t i = 0, max = ivars->run_max; i <= max; i++) {
- ivars->sorted_ids[i] = i;
- }
- Sort_quicksort(ivars->sorted_ids + 1, ivars->run_max, sizeof(int32_t),
- S_compare_doc_ids_by_ord_rev, ivars->sort_cache);
+ if (ivars->sorted_ids) { return; }
+
+ // Counting sort. Could be optimized by working directly on the
+ // ordinal arrays.
+
+ SortCache *sort_cache = ivars->sort_cache;
+ int32_t run_cardinality = ivars->run_cardinality;
+ int32_t run_max = ivars->run_max;
+
+ // Count.
+ int32_t *counts = (int32_t*)CALLOCATE(run_cardinality, sizeof(int32_t));
+ for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+ int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+ ++counts[ord];
+ }
+
+ // Compute partial sums.
+ int32_t sum = 0;
+ for (int32_t ord = 0; ord < run_cardinality; ++ord) {
+ int32_t count = counts[ord];
+ counts[ord] = sum;
+ sum += count;
}
+
+ // Distribute.
+ int32_t *sorted_ids = (int32_t*)MALLOCATE((run_max + 1) * sizeof(int32_t));
+ for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+ int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+ int32_t pos = counts[ord]++;
+ sorted_ids[pos] = doc_id;
+ }
+
+ ivars->sorted_ids = sorted_ids;
+ FREEMEM(counts);
}
void