You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2014/07/01 21:35:06 UTC
[1/8] git commit: refs/heads/master - Free sorted_ids in
SortFieldWriter a little earlier
Repository: lucy
Updated Branches:
refs/heads/master 4d500c56b -> eb10a8c68
Free sorted_ids in SortFieldWriter a little earlier
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/dd1a1985
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/dd1a1985
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/dd1a1985
Branch: refs/heads/master
Commit: dd1a19852c7cb191c91599587d03a26484c5a6fa
Parents: 22663de
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 26 02:28:04 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Wed Jun 18 21:22:59 2014 -0700
----------------------------------------------------------------------
core/Lucy/Index/SortFieldWriter.c | 2 ++
1 file changed, 2 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/dd1a1985/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index f34b68e..0d07ce5 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -471,6 +471,8 @@ SortFieldWriter_Refill_IMP(SortFieldWriter *self) {
if (ivars->run_tick > ivars->run_max) {
DECREF(ivars->sort_cache);
ivars->sort_cache = NULL;
+ FREEMEM(ivars->sorted_ids);
+ ivars->sorted_ids = NULL;
}
return count;
[7/8] git commit: refs/heads/master - Port t/015-sort_external.t to C
Posted by ma...@apache.org.
Port t/015-sort_external.t to C
Original commit by Nick Wellnhofer, updated by Marvin Humphrey for
changes to the SortExternal API.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/73d6925e
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/73d6925e
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/73d6925e
Branch: refs/heads/master
Commit: 73d6925e8b6597a41939132feec7ebd2f8bf6f0c
Parents: be4e183
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Oct 17 18:56:40 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Tue Jul 1 12:09:49 2014 -0700
----------------------------------------------------------------------
core/Lucy/Test.c | 2 +
core/Lucy/Test/Util/TestSortExternal.c | 287 ++++++++++++++++++++++++++
core/Lucy/Test/Util/TestSortExternal.cfh | 29 +++
core/Lucy/Util/BBSortEx.c | 20 ++
core/Lucy/Util/BBSortEx.cfh | 6 +
perl/buildlib/Lucy/Build/Binding/Util.pm | 9 -
perl/lib/Lucy/Util/BBSortEx.pm | 25 ---
perl/t/core/015-sort_external.t | 23 +++
8 files changed, 367 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Test.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c
index 62c9bdf..196ae6a 100644
--- a/core/Lucy/Test.c
+++ b/core/Lucy/Test.c
@@ -81,6 +81,7 @@
#include "Lucy/Test/Util/TestJson.h"
#include "Lucy/Test/Util/TestMemoryPool.h"
#include "Lucy/Test/Util/TestPriorityQueue.h"
+#include "Lucy/Test/Util/TestSortExternal.h"
TestSuite*
Test_create_test_suite() {
@@ -88,6 +89,7 @@ Test_create_test_suite() {
TestSuite_Add_Batch(suite, (TestBatch*)TestPriQ_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestBitVector_new());
+ TestSuite_Add_Batch(suite, (TestBatch*)TestSortExternal_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestMemPool_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestIxFileNames_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestJson_new());
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Test/Util/TestSortExternal.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/Util/TestSortExternal.c b/core/Lucy/Test/Util/TestSortExternal.c
new file mode 100644
index 0000000..2e04c83
--- /dev/null
+++ b/core/Lucy/Test/Util/TestSortExternal.c
@@ -0,0 +1,287 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#define TESTLUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+#include "Lucy/Test/Util/TestSortExternal.h"
+
+#include "Clownfish/TestHarness/TestBatchRunner.h"
+#include "Lucy/Util/BBSortEx.h"
+#include "Lucy/Util/SortExternal.h"
+
+static ByteBuf *a_bb;
+static ByteBuf *b_bb;
+static ByteBuf *c_bb;
+static ByteBuf *d_bb;
+static ByteBuf *x_bb;
+static ByteBuf *y_bb;
+static ByteBuf *z_bb;
+
+TestSortExternal*
+TestSortExternal_new() {
+ return (TestSortExternal*)VTable_Make_Obj(TESTSORTEXTERNAL);
+}
+
+static void
+S_init_bytebufs() {
+ a_bb = BB_new_bytes("a", 1);
+ b_bb = BB_new_bytes("b", 1);
+ c_bb = BB_new_bytes("c", 1);
+ d_bb = BB_new_bytes("d", 1);
+ x_bb = BB_new_bytes("x", 1);
+ y_bb = BB_new_bytes("y", 1);
+ z_bb = BB_new_bytes("z", 1);
+}
+
+static void
+S_destroy_bytebufs() {
+ DECREF(a_bb);
+ DECREF(b_bb);
+ DECREF(c_bb);
+ DECREF(d_bb);
+ DECREF(x_bb);
+ DECREF(y_bb);
+ DECREF(z_bb);
+}
+
+static void
+test_bbsortex(TestBatchRunner *runner) {
+ BBSortEx *sortex = BBSortEx_new(4, NULL);
+
+ BBSortEx_Feed(sortex, INCREF(c_bb));
+ TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 1,
+ "feed elem into cache");
+
+ BBSortEx_Feed(sortex, INCREF(b_bb));
+ BBSortEx_Feed(sortex, INCREF(d_bb));
+ BBSortEx_Sort_Buffer(sortex);
+
+ {
+ VArray *cache = BBSortEx_Peek_Cache(sortex);
+ VArray *wanted = VA_new(3);
+ VA_Push(wanted, INCREF(b_bb));
+ VA_Push(wanted, INCREF(c_bb));
+ VA_Push(wanted, INCREF(d_bb));
+ TEST_TRUE(runner, VA_Equals(cache, (Obj*)wanted), "sort cache");
+ DECREF(wanted);
+ DECREF(cache);
+ }
+
+ BBSortEx_Feed(sortex, INCREF(a_bb));
+ TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0,
+ "cache flushed automatically when mem_thresh crossed");
+ TEST_INT_EQ(runner, BBSortEx_Get_Num_Runs(sortex), 1, "run added");
+
+ VArray *external = VA_new(3);
+ VA_Push(external, INCREF(x_bb));
+ VA_Push(external, INCREF(y_bb));
+ VA_Push(external, INCREF(z_bb));
+ BBSortEx *run = BBSortEx_new(0x1000000, external);
+ BBSortEx_Add_Run(sortex, (SortExternal*)run);
+ BBSortEx_Flip(sortex);
+
+ {
+ VArray *got = VA_new(7);
+ Obj *object;
+ while (NULL != (object = BBSortEx_Fetch(sortex))) {
+ VA_Push(got, object);
+ }
+
+ VArray *wanted = VA_new(7);
+ VA_Push(wanted, INCREF(a_bb));
+ VA_Push(wanted, INCREF(b_bb));
+ VA_Push(wanted, INCREF(c_bb));
+ VA_Push(wanted, INCREF(d_bb));
+ VA_Push(wanted, INCREF(x_bb));
+ VA_Push(wanted, INCREF(y_bb));
+ VA_Push(wanted, INCREF(z_bb));
+
+ TEST_TRUE(runner, VA_Equals(got, (Obj*)wanted), "Add_Run");
+
+ DECREF(wanted);
+ DECREF(got);
+ }
+
+ DECREF(external);
+ DECREF(sortex);
+}
+
+static void
+test_clear_buffer(TestBatchRunner *runner) {
+ BBSortEx *sortex = BBSortEx_new(4, NULL);
+
+ BBSortEx_Feed(sortex, INCREF(c_bb));
+ BBSortEx_Clear_Buffer(sortex);
+ TEST_INT_EQ(runner, BBSortEx_Buffer_Count(sortex), 0, "Clear_Buffer");
+
+ BBSortEx_Feed(sortex, INCREF(b_bb));
+ BBSortEx_Feed(sortex, INCREF(a_bb));
+ BBSortEx_Flush(sortex);
+ BBSortEx_Flip(sortex);
+ Obj *object = BBSortEx_Peek(sortex);
+ TEST_TRUE(runner, BB_Equals(a_bb, object), "Peek");
+
+ VArray *got = VA_new(2);
+ while (NULL != (object = BBSortEx_Fetch(sortex))) {
+ VA_Push(got, object);
+ }
+ VArray *wanted = VA_new(2);
+ VA_Push(wanted, INCREF(a_bb));
+ VA_Push(wanted, INCREF(b_bb));
+ TEST_TRUE(runner, VA_Equals(got, (Obj*)wanted),
+ "elements cleared via Clear_Buffer truly cleared");
+
+ DECREF(wanted);
+ DECREF(got);
+ DECREF(sortex);
+}
+
+static void
+S_test_sort(TestBatchRunner *runner, VArray *bytebufs, uint32_t mem_thresh,
+ const char *test_name) {
+ uint32_t size = VA_Get_Size(bytebufs);
+ BBSortEx *sortex = BBSortEx_new(mem_thresh, NULL);
+ ByteBuf **shuffled = (ByteBuf**)MALLOCATE(size * sizeof(ByteBuf*));
+
+ for (int i = 0; i < size; ++i) {
+ shuffled[i] = (ByteBuf*)CERTIFY(VA_Fetch(bytebufs, i), BYTEBUF);
+ }
+ for (int i = size - 1; i > 0; --i) {
+ int shuffle_pos = rand() % (i + 1);
+ ByteBuf *temp = shuffled[shuffle_pos];
+ shuffled[shuffle_pos] = shuffled[i];
+ shuffled[i] = temp;
+ }
+ for (int i = 0; i < size; ++i) {
+ BBSortEx_Feed(sortex, INCREF(shuffled[i]));
+ }
+
+ BBSortEx_Flip(sortex);
+ VArray *got = VA_new(size);
+ Obj *object;
+ while (NULL != (object = BBSortEx_Fetch(sortex))) {
+ VA_Push(got, object);
+ }
+ TEST_TRUE(runner, VA_Equals(got, (Obj*)bytebufs), test_name);
+
+ FREEMEM(shuffled);
+ DECREF(got);
+ DECREF(sortex);
+}
+
+static void
+S_test_sort_letters(TestBatchRunner *runner, const char *letters,
+ uint32_t mem_thresh, const char *test_name) {
+ size_t num_letters = strlen(letters);
+ VArray *bytebufs = VA_new(num_letters);
+
+ for (int i = 0; i < num_letters; ++i) {
+ char ch = letters[i];
+ size_t size = ch == '_' ? 0 : 1;
+ ByteBuf *bytebuf = BB_new_bytes(&ch, size);
+ VA_Push(bytebufs, (Obj*)bytebuf);
+ }
+
+ S_test_sort(runner, bytebufs, mem_thresh, test_name);
+
+ DECREF(bytebufs);
+}
+
+static void
+test_sort_letters(TestBatchRunner *runner) {
+ S_test_sort_letters(runner, "abcdefghijklmnopqrstuvwxyz", 0x1000000,
+ "sort letters");
+ S_test_sort_letters(runner, "aaabcdxxxxxxyy", 0x1000000,
+ "sort repeated letters");
+ S_test_sort_letters(runner, "__abcdefghijklmnopqrstuvwxyz", 0x1000000,
+ "sort letters and empty strings");
+ S_test_sort_letters(runner, "abcdefghijklmnopqrstuvwxyz", 30,
+ "... with an absurdly low mem_thresh");
+ S_test_sort_letters(runner, "abcdefghijklmnopqrstuvwxyz", 1,
+ "... with an even lower mem_thresh");
+}
+
+static void
+test_sort_nothing(TestBatchRunner *runner) {
+ BBSortEx *sortex = BBSortEx_new(0x1000000, NULL);
+ BBSortEx_Flip(sortex);
+ TEST_TRUE(runner, BBSortEx_Fetch(sortex) == NULL,
+ "Sorting nothing returns undef");
+ DECREF(sortex);
+}
+
+static void
+test_sort_packed_ints(TestBatchRunner *runner) {
+ size_t num_ints = 11001;
+ VArray *bytebufs = VA_new(num_ints);
+
+ for (uint32_t i = 0; i < num_ints; ++i) {
+ char buf[4];
+ buf[0] = i >> 24;
+ buf[1] = i >> 16;
+ buf[2] = i >> 8;
+ buf[3] = i;
+ ByteBuf *bytebuf = BB_new_bytes(&buf, 4);
+ VA_Push(bytebufs, (Obj*)bytebuf);
+ }
+
+ S_test_sort(runner, bytebufs, 5000, "Sorting packed integers...");
+
+ DECREF(bytebufs);
+}
+
+static void
+test_sort_random_strings(TestBatchRunner *runner) {
+ size_t num_strings = 1001;
+ VArray *bytebufs = VA_new(num_strings);
+
+ for (uint32_t i = 0; i < num_strings; ++i) {
+ char buf[1201];
+ int size = rand() % 1200 + 1;
+ for (int i = 0; i < size; ++i) {
+ buf[i] = rand();
+ }
+ ByteBuf *bytebuf = BB_new_bytes(&buf, size);
+ VA_Push(bytebufs, (Obj*)bytebuf);
+ }
+
+ VA_Sort(bytebufs, NULL, NULL);
+ S_test_sort(runner, bytebufs, 15000,
+ "Random binary strings of random length");
+
+ DECREF(bytebufs);
+}
+
+void
+TestSortExternal_Run_IMP(TestSortExternal *self, TestBatchRunner *runner) {
+ TestBatchRunner_Plan(runner, (TestBatch*)self, 16);
+
+ srand((unsigned int)time((time_t*)NULL));
+ S_init_bytebufs();
+ test_bbsortex(runner);
+ test_clear_buffer(runner);
+ test_sort_letters(runner);
+ test_sort_nothing(runner);
+ test_sort_packed_ints(runner);
+ test_sort_random_strings(runner);
+ S_destroy_bytebufs();
+}
+
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Test/Util/TestSortExternal.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/Util/TestSortExternal.cfh b/core/Lucy/Test/Util/TestSortExternal.cfh
new file mode 100644
index 0000000..028b322
--- /dev/null
+++ b/core/Lucy/Test/Util/TestSortExternal.cfh
@@ -0,0 +1,29 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestLucy;
+
+class Lucy::Test::Util::TestSortExternal
+ inherits Clownfish::TestHarness::TestBatch {
+
+ inert incremented TestSortExternal*
+ new();
+
+ void
+ Run(TestSortExternal *self, TestBatchRunner *runner);
+}
+
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Util/BBSortEx.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BBSortEx.c b/core/Lucy/Util/BBSortEx.c
index ff96c73..c9201bb 100644
--- a/core/Lucy/Util/BBSortEx.c
+++ b/core/Lucy/Util/BBSortEx.c
@@ -169,4 +169,24 @@ BBSortEx_Compare_IMP(BBSortEx *self, void *va, void *vb) {
return BB_compare((ByteBuf**)va, (ByteBuf**)vb);
}
+VArray*
+BBSortEx_Peek_Cache_IMP(BBSortEx *self) {
+ BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
+ uint32_t count = ivars->buf_max - ivars->buf_tick;
+ Obj **buffer = ivars->buffer;
+ VArray *retval = VA_new(count);
+
+ for (uint32_t i = ivars->buf_tick; i < ivars->buf_max; ++i) {
+ VA_Push(retval, INCREF(buffer[i]));
+ }
+
+ return retval;
+}
+
+uint32_t
+BBSortEx_Get_Num_Runs_IMP(BBSortEx *self) {
+ BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
+ return VA_Get_Size(ivars->runs);
+}
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/core/Lucy/Util/BBSortEx.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BBSortEx.cfh b/core/Lucy/Util/BBSortEx.cfh
index 8ef64c8..6fd8656 100644
--- a/core/Lucy/Util/BBSortEx.cfh
+++ b/core/Lucy/Util/BBSortEx.cfh
@@ -51,6 +51,12 @@ class Lucy::Util::BBSortEx
int
Compare(BBSortEx *self, void *va, void *vb);
+ incremented VArray*
+ Peek_Cache(BBSortEx *self);
+
+ uint32_t
+ Get_Num_Runs(BBSortEx *self);
+
public void
Destroy(BBSortEx *self);
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/perl/buildlib/Lucy/Build/Binding/Util.pm
----------------------------------------------------------------------
diff --git a/perl/buildlib/Lucy/Build/Binding/Util.pm b/perl/buildlib/Lucy/Build/Binding/Util.pm
index 4ef7dbe..1438011 100644
--- a/perl/buildlib/Lucy/Build/Binding/Util.pm
+++ b/perl/buildlib/Lucy/Build/Binding/Util.pm
@@ -21,21 +21,12 @@ $VERSION = eval $VERSION;
sub bind_all {
my $class = shift;
- $class->bind_bbsortex;
$class->bind_debug;
$class->bind_freezer;
$class->bind_indexfilenames;
$class->bind_sortexternal;
}
-sub bind_bbsortex {
- my $binding = Clownfish::CFC::Binding::Perl::Class->new(
- parcel => "Lucy",
- class_name => "Lucy::Util::BBSortEx",
- );
- Clownfish::CFC::Binding::Perl::Class->register($binding);
-}
-
sub bind_debug {
my $xs_code = <<'END_XS_CODE';
MODULE = Lucy PACKAGE = Lucy::Util::Debug
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/perl/lib/Lucy/Util/BBSortEx.pm
----------------------------------------------------------------------
diff --git a/perl/lib/Lucy/Util/BBSortEx.pm b/perl/lib/Lucy/Util/BBSortEx.pm
deleted file mode 100644
index cbba688..0000000
--- a/perl/lib/Lucy/Util/BBSortEx.pm
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-package Lucy::Util::BBSortEx;
-use Lucy;
-our $VERSION = '0.003000';
-$VERSION = eval $VERSION;
-
-1;
-
-__END__
-
-
http://git-wip-us.apache.org/repos/asf/lucy/blob/73d6925e/perl/t/core/015-sort_external.t
----------------------------------------------------------------------
diff --git a/perl/t/core/015-sort_external.t b/perl/t/core/015-sort_external.t
new file mode 100644
index 0000000..6d966bb
--- /dev/null
+++ b/perl/t/core/015-sort_external.t
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+my $success = Lucy::Test::run_tests("Lucy::Test::Util::TestSortExternal");
+
+exit($success ? 0 : 1);
+
[4/8] git commit: refs/heads/master - Don't sort documents twice in
SortFieldWriter#Refill
Posted by ma...@apache.org.
Don't sort documents twice in SortFieldWriter#Refill
The doc_ids are already sorted in S_lazy_init_sorted_ids. We only have
to make sure that S_lazy_init_sorted_ids uses the doc_id as secondary
sort key.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/efbc9ace
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/efbc9ace
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/efbc9ace
Branch: refs/heads/master
Commit: efbc9acef7a97463b9d98404e6add8362fcf5e4c
Parents: 4d500c5
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 26 01:25:52 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Wed Jun 18 21:22:59 2014 -0700
----------------------------------------------------------------------
core/Lucy/Index/SortFieldWriter.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/efbc9ace/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index 381f9ad..9fe71c0 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -345,7 +345,7 @@ SortFieldWriter_Compare_IMP(SortFieldWriter *self, void *va, void *vb) {
SFWriterElemIVARS *b = SFWriterElem_IVARS(*(SFWriterElem**)vb);
int32_t comparison
= FType_null_back_compare_values(ivars->type, a->value, b->value);
- if (comparison == 0) { comparison = b->doc_id - a->doc_id; }
+ if (comparison == 0) { comparison = a->doc_id - b->doc_id; }
return comparison;
}
@@ -356,7 +356,9 @@ S_compare_doc_ids_by_ord_rev(void *context, const void *va, const void *vb) {
int32_t b = *(int32_t*)vb;
int32_t ord_a = SortCache_Ordinal(sort_cache, a);
int32_t ord_b = SortCache_Ordinal(sort_cache, b);
- return ord_a - ord_b;
+ int32_t comparison = ord_a - ord_b;
+ if (comparison == 0) { comparison = a - b; }
+ return comparison;
}
static void
@@ -480,7 +482,6 @@ SortFieldWriter_Refill_IMP(SortFieldWriter *self) {
ivars->run_tick++;
}
ivars->run_ord++;
- SortFieldWriter_Sort_Buffer(self);
if (ivars->run_ord >= ivars->run_cardinality) {
DECREF(ivars->sort_cache);
[3/8] git commit: refs/heads/master - Make SortFieldWriter#Refill
obey the memory limit
Posted by ma...@apache.org.
Make SortFieldWriter#Refill obey the memory limit
The old logic was broken.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/0cf86d1d
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/0cf86d1d
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/0cf86d1d
Branch: refs/heads/master
Commit: 0cf86d1d79cc91723762f6abd34c034a25cb285b
Parents: efbc9ac
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 26 02:04:42 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Wed Jun 18 21:22:59 2014 -0700
----------------------------------------------------------------------
core/Lucy/Index/SortFieldWriter.c | 20 +++-----------------
core/Lucy/Index/SortFieldWriter.cfh | 1 -
2 files changed, 3 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/0cf86d1d/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index 9fe71c0..8a372a3 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -92,7 +92,6 @@ SortFieldWriter_init(SortFieldWriter *self, Schema *schema,
ivars->sort_cache = NULL;
ivars->doc_map = NULL;
ivars->sorted_ids = NULL;
- ivars->run_ord = 0;
ivars->run_tick = 0;
ivars->ord_width = 0;
ivars->last_val = NULL;
@@ -450,19 +449,10 @@ SortFieldWriter_Refill_IMP(SortFieldWriter *self) {
I32Array *const doc_map = ivars->doc_map;
SortCache *const sort_cache = ivars->sort_cache;
- while (ivars->run_ord < ivars->run_cardinality
+ uint32_t count = 0;
+ while (ivars->run_tick <= ivars->run_max
&& MemPool_Get_Consumed(ivars->mem_pool) < ivars->mem_thresh
) {
- Obj *val = SortCache_Value(sort_cache, ivars->run_ord);
- if (val) {
- Hash_Store(uniq_vals, val, (Obj*)CFISH_TRUE);
- DECREF(val);
- break;
- }
- ivars->run_ord++;
- }
- uint32_t count = 0;
- while (ivars->run_tick <= ivars->run_max) {
int32_t raw_doc_id = ivars->sorted_ids[ivars->run_tick];
int32_t ord = SortCache_Ordinal(sort_cache, raw_doc_id);
if (ord != null_ord) {
@@ -476,14 +466,10 @@ SortFieldWriter_Refill_IMP(SortFieldWriter *self) {
DECREF(val);
}
}
- else if (ord > ivars->run_ord) {
- break;
- }
ivars->run_tick++;
}
- ivars->run_ord++;
- if (ivars->run_ord >= ivars->run_cardinality) {
+ if (ivars->run_tick > ivars->run_max) {
DECREF(ivars->sort_cache);
ivars->sort_cache = NULL;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/0cf86d1d/core/Lucy/Index/SortFieldWriter.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.cfh b/core/Lucy/Index/SortFieldWriter.cfh
index 8f749cc..0128c61 100644
--- a/core/Lucy/Index/SortFieldWriter.cfh
+++ b/core/Lucy/Index/SortFieldWriter.cfh
@@ -48,7 +48,6 @@ class Lucy::Index::SortFieldWriter
int32_t run_max;
bool var_width;
int32_t *sorted_ids;
- int32_t run_ord;
int32_t run_tick;
int32_t ord_width;
Obj *last_val;
[6/8] git commit: refs/heads/master - Convert t/221-sort_writer.t to C
Posted by ma...@apache.org.
Convert t/221-sort_writer.t to C
Original commit by Nick Wellnhofer, amended by Marvin Humphrey to
replace "cnick" with "nickname".
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/be4e1833
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/be4e1833
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/be4e1833
Branch: refs/heads/master
Commit: be4e1833868fd8cca5cba9883d1bcd1cab32f2b4
Parents: d96ef18
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Sep 28 15:56:26 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Tue Jul 1 12:09:44 2014 -0700
----------------------------------------------------------------------
core/Lucy/Test.c | 2 +
core/Lucy/Test/Index/TestSortWriter.c | 315 +++++++++++++++++++++++++++
core/Lucy/Test/Index/TestSortWriter.cfh | 43 ++++
perl/t/221-sort_writer.t | 174 ---------------
perl/t/core/224-sort_writer.t | 23 ++
5 files changed, 383 insertions(+), 174 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/core/Lucy/Test.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c
index 5923e98..62c9bdf 100644
--- a/core/Lucy/Test.c
+++ b/core/Lucy/Test.c
@@ -40,6 +40,7 @@
#include "Lucy/Test/Index/TestSegWriter.h"
#include "Lucy/Test/Index/TestSegment.h"
#include "Lucy/Test/Index/TestSnapshot.h"
+#include "Lucy/Test/Index/TestSortWriter.h"
#include "Lucy/Test/Index/TestTermInfo.h"
#include "Lucy/Test/Object/TestBitVector.h"
#include "Lucy/Test/Object/TestI32Array.h"
@@ -121,6 +122,7 @@ Test_create_test_suite() {
TestSuite_Add_Batch(suite, (TestBatch*)TestHLWriter_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestPListWriter_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestSegWriter_new());
+ TestSuite_Add_Batch(suite, (TestBatch*)TestSortWriter_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestPolyReader_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestFullTextType_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestBlobType_new());
http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/core/Lucy/Test/Index/TestSortWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/Index/TestSortWriter.c b/core/Lucy/Test/Index/TestSortWriter.c
new file mode 100644
index 0000000..761572f
--- /dev/null
+++ b/core/Lucy/Test/Index/TestSortWriter.c
@@ -0,0 +1,315 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define TESTLUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test/Index/TestSortWriter.h"
+
+#include "Clownfish/TestHarness/TestBatchRunner.h"
+#include "Lucy/Analysis/StandardTokenizer.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Index/DocReader.h"
+#include "Lucy/Index/Indexer.h"
+#include "Lucy/Index/IndexManager.h"
+#include "Lucy/Index/PolyReader.h"
+#include "Lucy/Index/Segment.h"
+#include "Lucy/Index/SegReader.h"
+#include "Lucy/Index/SortCache.h"
+#include "Lucy/Index/SortReader.h"
+#include "Lucy/Index/SortWriter.h"
+#include "Lucy/Plan/FullTextType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Plan/StringType.h"
+#include "Lucy/Store/RAMFolder.h"
+
+static String *name_str;
+static String *speed_str;
+static String *weight_str;
+static String *home_str;
+static String *cat_str;
+static String *wheels_str;
+static String *unused_str;
+static String *nope_str;
+
+TestSortWriter*
+TestSortWriter_new() {
+ return (TestSortWriter*)VTable_Make_Obj(TESTSORTWRITER);
+}
+
+static void
+S_init_strings() {
+ name_str = Str_newf("name");
+ speed_str = Str_newf("speed");
+ weight_str = Str_newf("weight");
+ home_str = Str_newf("home");
+ cat_str = Str_newf("cat");
+ wheels_str = Str_newf("wheels");
+ unused_str = Str_newf("unused");
+ nope_str = Str_newf("nope");
+}
+
+static void
+S_destroy_strings() {
+ DECREF(name_str);
+ DECREF(speed_str);
+ DECREF(weight_str);
+ DECREF(home_str);
+ DECREF(cat_str);
+ DECREF(wheels_str);
+ DECREF(unused_str);
+ DECREF(nope_str);
+}
+
+static Schema*
+S_create_schema() {
+ Schema *schema = Schema_new();
+
+ StandardTokenizer *tokenizer = StandardTokenizer_new();
+ FullTextType *full_text_type = FullTextType_new((Analyzer*)tokenizer);
+ FullTextType_Set_Sortable(full_text_type, true);
+
+ StringType *string_type = StringType_new();
+ StringType_Set_Sortable(string_type, true);
+
+ StringType *unsortable = StringType_new();
+
+ Schema_Spec_Field(schema, name_str, (FieldType*)full_text_type);
+ Schema_Spec_Field(schema, speed_str, (FieldType*)string_type);
+ Schema_Spec_Field(schema, weight_str, (FieldType*)string_type);
+ Schema_Spec_Field(schema, home_str, (FieldType*)string_type);
+ Schema_Spec_Field(schema, cat_str, (FieldType*)string_type);
+ Schema_Spec_Field(schema, wheels_str, (FieldType*)string_type);
+ Schema_Spec_Field(schema, unused_str, (FieldType*)string_type);
+ Schema_Spec_Field(schema, nope_str, (FieldType*)unsortable);
+
+ DECREF(unsortable);
+ DECREF(string_type);
+ DECREF(full_text_type);
+ DECREF(tokenizer);
+
+ return schema;
+}
+
+static void
+S_store_field(Doc *doc, String *field, const char *value) {
+ if (value) {
+ StackString *string = SSTR_WRAP_UTF8(value, strlen(value));
+ Doc_Store(doc, field, (Obj*)string);
+ }
+}
+
+static void
+S_add_doc(Indexer *indexer, const char *name, const char *speed,
+ const char *weight, const char *home, const char *wheels,
+ const char *nope) {
+ Doc *doc = Doc_new(NULL, 0);
+
+ S_store_field(doc, name_str, name);
+ S_store_field(doc, speed_str, speed);
+ S_store_field(doc, weight_str, weight);
+ S_store_field(doc, home_str, home);
+ S_store_field(doc, cat_str, "vehicle");
+ S_store_field(doc, wheels_str, wheels);
+ S_store_field(doc, nope_str, nope);
+
+ Indexer_Add_Doc(indexer, doc, 1.0f);
+
+ DECREF(doc);
+}
+
+static void
+S_test_sort_cache(TestBatchRunner *runner, RAMFolder *folder,
+ SegReader *seg_reader, const char *gen, bool is_used,
+ String *field) {
+ Segment *segment = SegReader_Get_Segment(seg_reader);
+ int32_t field_num = Seg_Field_Num(segment, field);
+ String *filename = Str_newf("seg_%s/sort-%i32.ord", gen, field_num);
+ if (is_used) {
+ TEST_TRUE(runner, RAMFolder_Exists(folder, filename),
+ "sort files written for %s", Str_Get_Ptr8(field));
+ }
+ else {
+ TEST_TRUE(runner, !RAMFolder_Exists(folder, filename),
+ "no sort files written for %s", Str_Get_Ptr8(field));
+ }
+ DECREF(filename);
+
+ if (!is_used) { return; }
+
+ SortReader *sort_reader
+ = (SortReader*)SegReader_Obtain(seg_reader,
+ VTable_Get_Name(SORTREADER));
+ DocReader *doc_reader
+ = (DocReader*)SegReader_Obtain(seg_reader, VTable_Get_Name(DOCREADER));
+ SortCache *sort_cache
+ = SortReader_Fetch_Sort_Cache(sort_reader, field);
+
+ int32_t doc_max = SegReader_Doc_Max(seg_reader);
+ for (int32_t doc_id = 1; doc_id <= doc_max; ++doc_id) {
+ int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+ Obj *cache_value = SortCache_Value(sort_cache, ord);
+ HitDoc *doc = DocReader_Fetch_Doc(doc_reader, doc_id);
+ Obj *doc_value = HitDoc_Extract(doc, field);
+
+ bool is_equal;
+ if (cache_value == NULL || doc_value == NULL) {
+ is_equal = (cache_value == doc_value);
+ }
+ else {
+ is_equal = Obj_Equals(cache_value, doc_value);
+ }
+ TEST_TRUE(runner, is_equal, "correct cached value field %s doc %d",
+ Str_Get_Ptr8(field), doc_id);
+
+ DECREF(doc_value);
+ DECREF(doc);
+ DECREF(cache_value);
+ }
+}
+
+static void
+test_sort_writer(TestBatchRunner *runner) {
+ Schema *schema = S_create_schema();
+ RAMFolder *folder = RAMFolder_new(NULL);
+
+ {
+ // Add vehicles.
+ Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
+
+ S_add_doc(indexer, "airplane", "0200", "8000", "air", "3", "nyet");
+ S_add_doc(indexer, "bike", "0015", "0025", "land", "2", NULL);
+ S_add_doc(indexer, "car", "0070", "3000", "land", "4", NULL);
+
+ Indexer_Commit(indexer);
+ DECREF(indexer);
+ }
+
+ {
+ PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL);
+ VArray *seg_readers = PolyReader_Get_Seg_Readers(poly_reader);
+ SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, 0);
+
+ S_test_sort_cache(runner, folder, seg_reader, "1", true, name_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", true, speed_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", true, weight_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", true, home_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", true, cat_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", true, wheels_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", false, unused_str);
+ S_test_sort_cache(runner, folder, seg_reader, "1", false, nope_str);
+
+ DECREF(poly_reader);
+ }
+
+ {
+ // Add a second segment.
+ NonMergingIndexManager *manager = NMIxManager_new();
+ Indexer *indexer
+ = Indexer_new(schema, (Obj*)folder, (IndexManager*)manager, 0);
+ // no "wheels" field -- test NULL/undef
+ S_add_doc(indexer, "dirigible", "0040", "0000", "air", NULL, NULL);
+ Indexer_Commit(indexer);
+ DECREF(indexer);
+ DECREF(manager);
+ }
+
+ {
+ // Consolidate everything, to test merging.
+ Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
+ StackString *bike_str = SSTR_WRAP_UTF8("bike", 4);
+ Indexer_Delete_By_Term(indexer, name_str, (Obj*)bike_str);
+ // no "wheels" field -- test NULL/undef
+ S_add_doc(indexer, "elephant", "0020", "6000", "land", NULL, NULL);
+ Indexer_Optimize(indexer);
+ Indexer_Commit(indexer);
+ DECREF(indexer);
+ }
+
+ {
+ VArray *filenames = RAMFolder_List_R(folder, NULL);
+ int num_old_seg_files = 0;
+ for (uint32_t i = 0, size = VA_Get_Size(filenames); i < size; ++i) {
+ String *filename = (String*)VA_Fetch(filenames, i);
+ if (Str_Find_Utf8(filename, "seg_1", 5) >= 0
+ || Str_Find_Utf8(filename, "seg_2", 5) >= 0
+ ) {
+ ++num_old_seg_files;
+ }
+ }
+ TEST_INT_EQ(runner, num_old_seg_files, 0,
+ "all files from earlier segments zapped");
+ DECREF(filenames);
+ }
+
+ {
+ PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL);
+ VArray *seg_readers = PolyReader_Get_Seg_Readers(poly_reader);
+ SegReader *seg_reader = (SegReader*)VA_Fetch(seg_readers, 0);
+
+ S_test_sort_cache(runner, folder, seg_reader, "3", true, name_str);
+ S_test_sort_cache(runner, folder, seg_reader, "3", true, speed_str);
+ S_test_sort_cache(runner, folder, seg_reader, "3", true, weight_str);
+ S_test_sort_cache(runner, folder, seg_reader, "3", true, home_str);
+ S_test_sort_cache(runner, folder, seg_reader, "3", true, cat_str);
+ S_test_sort_cache(runner, folder, seg_reader, "3", true, wheels_str);
+
+ DECREF(poly_reader);
+ }
+
+ DECREF(folder);
+ DECREF(schema);
+}
+
+void
+TestSortWriter_Run_IMP(TestSortWriter *self, TestBatchRunner *runner) {
+ TestBatchRunner_Plan(runner, (TestBatch*)self, 57);
+
+ // Force frequent flushes.
+ SortWriter_set_default_mem_thresh(100);
+
+ S_init_strings();
+ test_sort_writer(runner);
+ S_destroy_strings();
+}
+
+NonMergingIndexManager*
+NMIxManager_new() {
+ NonMergingIndexManager *self
+ = (NonMergingIndexManager*)VTable_Make_Obj(NONMERGINGINDEXMANAGER);
+ return NMIxManager_init(self);
+}
+
+NonMergingIndexManager*
+NMIxManager_init(NonMergingIndexManager *self) {
+ IxManager_init((IndexManager*)self, NULL, NULL);
+ return self;
+}
+
+VArray*
+NMIxManager_Recycle_IMP(NonMergingIndexManager *self, PolyReader *reader,
+ lucy_DeletionsWriter *del_writer, int64_t cutoff,
+ bool optimize) {
+ UNUSED_VAR(self);
+ UNUSED_VAR(reader);
+ UNUSED_VAR(del_writer);
+ UNUSED_VAR(cutoff);
+ UNUSED_VAR(optimize);
+ return VA_new(0);
+}
+
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/core/Lucy/Test/Index/TestSortWriter.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/Index/TestSortWriter.cfh b/core/Lucy/Test/Index/TestSortWriter.cfh
new file mode 100644
index 0000000..98e3ec6
--- /dev/null
+++ b/core/Lucy/Test/Index/TestSortWriter.cfh
@@ -0,0 +1,43 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestLucy;
+
+class Lucy::Test::Index::TestSortWriter
+ inherits Clownfish::TestHarness::TestBatch {
+
+ inert incremented TestSortWriter*
+ new();
+
+ void
+ Run(TestSortWriter *self, TestBatchRunner *runner);
+}
+
+class Lucy::Test::Index::NonMergingIndexManager nickname NMIxManager
+ inherits Lucy::Index::IndexManager {
+
+ public inert incremented NonMergingIndexManager*
+ new();
+
+ public inert NonMergingIndexManager*
+ init(NonMergingIndexManager *self);
+
+ public incremented VArray*
+ Recycle(NonMergingIndexManager *self, PolyReader *reader,
+ DeletionsWriter *del_writer, int64_t cutoff,
+ bool optimize = false);
+}
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/perl/t/221-sort_writer.t
----------------------------------------------------------------------
diff --git a/perl/t/221-sort_writer.t b/perl/t/221-sort_writer.t
deleted file mode 100644
index 45fb341..0000000
--- a/perl/t/221-sort_writer.t
+++ /dev/null
@@ -1,174 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-use strict;
-use warnings;
-use lib 'buildlib';
-
-package NonMergingIndexManager;
-use base qw( Lucy::Index::IndexManager );
-
-sub recycle {
- return Clownfish::VArray->new;
-}
-
-package SortSchema;
-use base qw( Lucy::Plan::Schema );
-
-sub new {
- my $self = shift->SUPER::new(@_);
- my $fulltext_type = Lucy::Plan::FullTextType->new(
- analyzer => Lucy::Analysis::StandardTokenizer->new,
- sortable => 1,
- );
- my $string_type = Lucy::Plan::StringType->new( sortable => 1 );
- my $unsortable = Lucy::Plan::StringType->new;
- $self->spec_field( name => 'name', type => $fulltext_type );
- $self->spec_field( name => 'speed', type => $string_type );
- $self->spec_field( name => 'weight', type => $string_type );
- $self->spec_field( name => 'home', type => $string_type );
- $self->spec_field( name => 'cat', type => $string_type );
- $self->spec_field( name => 'wheels', type => $string_type );
- $self->spec_field( name => 'unused', type => $string_type );
- $self->spec_field( name => 'nope', type => $unsortable );
- return $self;
-}
-
-package main;
-use Lucy::Test;
-use Test::More tests => 57;
-
-# Force frequent flushes.
-Lucy::Index::SortWriter::set_default_mem_thresh(100);
-
-my $airplane = {
- name => 'airplane',
- speed => '0200',
- weight => '8000',
- home => 'air',
- cat => 'vehicle',
- wheels => 3,
- nope => 'nyet',
-};
-my $bike = {
- name => 'bike',
- speed => '0015',
- weight => '0025',
- home => 'land',
- cat => 'vehicle',
- wheels => 2,
-};
-my $car = {
- name => 'car',
- speed => '0070',
- weight => '3000',
- home => 'land',
- cat => 'vehicle',
- wheels => 4,
-};
-my $dirigible = {
- name => 'dirigible',
- speed => '0040',
- weight => '0000',
- home => 'air',
- cat => 'vehicle',
- # no "wheels" field -- test NULL/undef
-};
-my $elephant = {
- name => 'elephant',
- speed => '0020',
- weight => '6000',
- home => 'land',
- cat => 'vehicle',
- # no "wheels" field -- test NULL/undef
-};
-
-my $folder = Lucy::Store::RAMFolder->new;
-my $schema = SortSchema->new;
-my $indexer = Lucy::Index::Indexer->new(
- index => $folder,
- schema => $schema,
-);
-
-# Add vehicles.
-$indexer->add_doc($_) for ( $airplane, $bike, $car );
-
-$indexer->commit;
-
-my $polyreader = Lucy::Index::IndexReader->open( index => $folder );
-my $seg_reader = $polyreader->get_seg_readers->[0];
-my $sort_reader = $seg_reader->obtain("Lucy::Index::SortReader");
-my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader");
-my $segment = $seg_reader->get_segment;
-
-for my $field (qw( name speed weight home cat wheels )) {
- my $field_num = $segment->field_num($field);
- ok( $folder->exists("seg_1/sort-$field_num.ord"),
- "sort files written for $field" );
- my $sort_cache = $sort_reader->fetch_sort_cache($field);
- for ( 1 .. $seg_reader->doc_max ) {
- is( $sort_cache->value( ord => $sort_cache->ordinal($_) ),
- $doc_reader->fetch_doc($_)->{$field},
- "correct cached value doc $_ "
- );
- }
-}
-
-for my $field (qw( unused nope )) {
- my $field_num = $segment->field_num($field);
- ok( !$folder->exists("seg_1/sort-$field_num.ord"),
- "no sort files written for $field" );
-}
-
-# Add a second segment.
-$indexer = Lucy::Index::Indexer->new(
- index => $folder,
- schema => $schema,
- manager => NonMergingIndexManager->new,
-);
-$indexer->add_doc($dirigible);
-$indexer->commit;
-
-# Consolidate everything, to test merging.
-$indexer = Lucy::Index::Indexer->new(
- index => $folder,
- schema => $schema,
-);
-$indexer->delete_by_term( field => 'name', term => 'bike' );
-$indexer->add_doc($elephant);
-$indexer->optimize;
-$indexer->commit;
-
-my $num_old_seg_files = scalar grep {m/seg_[12]/} @{ $folder->list_r };
-is( $num_old_seg_files, 0, "all files from earlier segments zapped" );
-
-$polyreader = Lucy::Index::IndexReader->open( index => $folder );
-$seg_reader = $polyreader->get_seg_readers->[0];
-$sort_reader = $seg_reader->obtain("Lucy::Index::SortReader");
-$doc_reader = $seg_reader->obtain("Lucy::Index::DocReader");
-$segment = $seg_reader->get_segment;
-
-for my $field (qw( name speed weight home cat wheels )) {
- my $field_num = $segment->field_num($field);
- ok( $folder->exists("seg_3/sort-$field_num.ord"),
- "sort files written for $field" );
- my $sort_cache = $sort_reader->fetch_sort_cache($field);
- for ( 1 .. $seg_reader->doc_max ) {
- is( $sort_cache->value( ord => $sort_cache->ordinal($_) ),
- $doc_reader->fetch_doc($_)->{$field},
- "correct cached value field $field doc $_ "
- );
- }
-}
http://git-wip-us.apache.org/repos/asf/lucy/blob/be4e1833/perl/t/core/224-sort_writer.t
----------------------------------------------------------------------
diff --git a/perl/t/core/224-sort_writer.t b/perl/t/core/224-sort_writer.t
new file mode 100644
index 0000000..b987614
--- /dev/null
+++ b/perl/t/core/224-sort_writer.t
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+my $success = Lucy::Test::run_tests("Lucy::Test::Index::TestSortWriter");
+
+exit($success ? 0 : 1);
+
[2/8] git commit: refs/heads/master - Initialize
SortFieldWriter#run_tick to 1
Posted by ma...@apache.org.
Initialize SortFieldWriter#run_tick to 1
Make sure we never use a run_tick of 0.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/22663dea
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/22663dea
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/22663dea
Branch: refs/heads/master
Commit: 22663dea422a186700379b80021fb202ca982dbd
Parents: 0cf86d1
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 26 02:13:49 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Wed Jun 18 21:22:59 2014 -0700
----------------------------------------------------------------------
core/Lucy/Index/SortFieldWriter.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/22663dea/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index 8a372a3..f34b68e 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -92,7 +92,7 @@ SortFieldWriter_init(SortFieldWriter *self, Schema *schema,
ivars->sort_cache = NULL;
ivars->doc_map = NULL;
ivars->sorted_ids = NULL;
- ivars->run_tick = 0;
+ ivars->run_tick = 1;
ivars->ord_width = 0;
ivars->last_val = NULL;
@@ -217,7 +217,6 @@ SortFieldWriter_Add_Segment_IMP(SortFieldWriter *self, SegReader *reader,
run_ivars->run_max = SegReader_Doc_Max(reader);
run_ivars->run_cardinality = SortCache_Get_Cardinality(sort_cache);
run_ivars->null_ord = SortCache_Get_Null_Ord(sort_cache);
- run_ivars->run_tick = 1;
SortFieldWriter_Add_Run(self, (SortExternal*)run);
}
[8/8] git commit: refs/heads/master - Port t/028-sortexrun.t to C
Posted by ma...@apache.org.
Port t/028-sortexrun.t to C
Original commit by Nick Wellnhofer, amended by Marvin Humphrey for
changes to the SortExternal API.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/eb10a8c6
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/eb10a8c6
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/eb10a8c6
Branch: refs/heads/master
Commit: eb10a8c6842c3483cff014a8a7b04de0ed5c0858
Parents: 73d6925
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Fri Oct 18 22:15:04 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Tue Jul 1 12:09:49 2014 -0700
----------------------------------------------------------------------
core/Lucy/Test/Util/TestSortExternal.c | 38 ++++++++++++++++++++++-
core/Lucy/Util/BBSortEx.c | 8 +++++
core/Lucy/Util/BBSortEx.cfh | 3 ++
perl/t/028-sortexrun.t | 48 -----------------------------
4 files changed, 48 insertions(+), 49 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/eb10a8c6/core/Lucy/Test/Util/TestSortExternal.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/Util/TestSortExternal.c b/core/Lucy/Test/Util/TestSortExternal.c
index 2e04c83..19e83cf 100644
--- a/core/Lucy/Test/Util/TestSortExternal.c
+++ b/core/Lucy/Test/Util/TestSortExternal.c
@@ -269,9 +269,44 @@ test_sort_random_strings(TestBatchRunner *runner) {
DECREF(bytebufs);
}
+static void
+test_run(TestBatchRunner *runner) {
+ VArray *letters = VA_new(26);
+ for (int i = 0; i < 26; ++i) {
+ char ch = 'a' + i;
+ ByteBuf *bytebuf = BB_new_bytes(&ch, 1);
+ VA_Push(letters, (Obj*)bytebuf);
+ }
+ BBSortEx *run = BBSortEx_new(0x1000000, letters);
+ BBSortEx_Set_Mem_Thresh(run, 5);
+
+ BBSortEx_Refill(run);
+ TEST_INT_EQ(runner, BBSortEx_Buffer_Count(run), 5,
+ "Refill doesn't exceed memory threshold");
+
+ Obj *endpost = BBSortEx_Peek_Last(run);
+ ByteBuf *wanted = BB_new_bytes("e", 1);
+ TEST_TRUE(runner, BB_Equals(wanted, endpost), "Peek_Last");
+
+ VArray *elems = VA_new(26);
+ do {
+ while (BBSortEx_Buffer_Count(run) > 0) {
+ Obj *object = BBSortEx_Fetch(run);
+ VA_Push(elems, object);
+ }
+ } while (BBSortEx_Refill(run) > 0);
+ TEST_TRUE(runner, VA_Equals(elems, (Obj*)letters), "retrieve all elems");
+
+ DECREF(elems);
+ DECREF(wanted);
+ DECREF(endpost);
+ DECREF(letters);
+ DECREF(run);
+}
+
void
TestSortExternal_Run_IMP(TestSortExternal *self, TestBatchRunner *runner) {
- TestBatchRunner_Plan(runner, (TestBatch*)self, 16);
+ TestBatchRunner_Plan(runner, (TestBatch*)self, 19);
srand((unsigned int)time((time_t*)NULL));
S_init_bytebufs();
@@ -281,6 +316,7 @@ TestSortExternal_Run_IMP(TestSortExternal *self, TestBatchRunner *runner) {
test_sort_nothing(runner);
test_sort_packed_ints(runner);
test_sort_random_strings(runner);
+ test_run(runner);
S_destroy_bytebufs();
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/eb10a8c6/core/Lucy/Util/BBSortEx.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BBSortEx.c b/core/Lucy/Util/BBSortEx.c
index c9201bb..66e0568 100644
--- a/core/Lucy/Util/BBSortEx.c
+++ b/core/Lucy/Util/BBSortEx.c
@@ -183,6 +183,14 @@ BBSortEx_Peek_Cache_IMP(BBSortEx *self) {
return retval;
}
+Obj*
+BBSortEx_Peek_Last_IMP(BBSortEx *self) {
+ BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
+ uint32_t count = ivars->buf_max - ivars->buf_tick;
+ if (count == 0) { return NULL; }
+ return INCREF(ivars->buffer[count-1]);
+}
+
uint32_t
BBSortEx_Get_Num_Runs_IMP(BBSortEx *self) {
BBSortExIVARS *const ivars = BBSortEx_IVARS(self);
http://git-wip-us.apache.org/repos/asf/lucy/blob/eb10a8c6/core/Lucy/Util/BBSortEx.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Util/BBSortEx.cfh b/core/Lucy/Util/BBSortEx.cfh
index 6fd8656..a43b1fd 100644
--- a/core/Lucy/Util/BBSortEx.cfh
+++ b/core/Lucy/Util/BBSortEx.cfh
@@ -54,6 +54,9 @@ class Lucy::Util::BBSortEx
incremented VArray*
Peek_Cache(BBSortEx *self);
+ incremented nullable Obj*
+ Peek_Last(BBSortEx *self);
+
uint32_t
Get_Num_Runs(BBSortEx *self);
http://git-wip-us.apache.org/repos/asf/lucy/blob/eb10a8c6/perl/t/028-sortexrun.t
----------------------------------------------------------------------
diff --git a/perl/t/028-sortexrun.t b/perl/t/028-sortexrun.t
deleted file mode 100644
index f0df52a..0000000
--- a/perl/t/028-sortexrun.t
+++ /dev/null
@@ -1,48 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-use strict;
-use warnings;
-use lib 'buildlib';
-
-use Test::More skip_all => 'Disabled until test ported to C';
-#use Test::More tests => 5;
-use Lucy::Test;
-use Clownfish qw( to_perl );
-
-my $letters = Clownfish::VArray->new;
-$letters->push( Clownfish::ByteBuf->new($_) ) for 'a' .. 'z';
-my $run = Lucy::Util::BBSortEx->new( external => $letters );
-$run->set_mem_thresh(5);
-
-my $num_in_cache = $run->refill;
-is( $run->cache_count, 5, "Read_Elem puts the brakes on Refill" );
-my $endpost = $run->peek_last;
-is( $endpost, 'e', "Peek_Last" );
-$endpost = Clownfish::ByteBuf->new('b');
-my $slice = $run->pop_slice($endpost);
-is( scalar @$slice, 2, "Pop_Slice gets only less-than-or-equal elems" );
-@$slice = map { to_perl($_) } @$slice;
-is_deeply( $slice, [qw( a b )], "Pop_Slice picks highest elems" );
-
-my @got = qw( a b );
-while (1) {
- $endpost = $run->peek_last;
- $slice = $run->pop_slice( Clownfish::ByteBuf->new($endpost) );
- push @got, map { to_perl($_) } @$slice;
- last unless $run->refill;
-}
-is_deeply( \@got, [ 'a' .. 'z' ], "retrieve all elems" );
-
[5/8] git commit: refs/heads/master - Use counting sort to sort
doc_ids in SortFieldWriter#Refill
Posted by ma...@apache.org.
Use counting sort to sort doc_ids in SortFieldWriter#Refill
Since we already have the ordinals for each doc_id, we can use a
counting sort. This uses a temporary array of size run_cardinality but
runs in linear time.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/d96ef18e
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/d96ef18e
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/d96ef18e
Branch: refs/heads/master
Commit: d96ef18e4c8e1ef19ce55be5926a0728254df30d
Parents: dd1a198
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 26 19:43:42 2013 +0200
Committer: Marvin Humphrey <ma...@rectangular.com>
Committed: Wed Jun 18 21:23:00 2014 -0700
----------------------------------------------------------------------
core/Lucy/Index/SortFieldWriter.c | 53 +++++++++++++++++++++-------------
1 file changed, 33 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/d96ef18e/core/Lucy/Index/SortFieldWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c
index 0d07ce5..b6faec5 100644
--- a/core/Lucy/Index/SortFieldWriter.c
+++ b/core/Lucy/Index/SortFieldWriter.c
@@ -347,30 +347,43 @@ SortFieldWriter_Compare_IMP(SortFieldWriter *self, void *va, void *vb) {
return comparison;
}
-static int
-S_compare_doc_ids_by_ord_rev(void *context, const void *va, const void *vb) {
- SortCache *sort_cache = (SortCache*)context;
- int32_t a = *(int32_t*)va;
- int32_t b = *(int32_t*)vb;
- int32_t ord_a = SortCache_Ordinal(sort_cache, a);
- int32_t ord_b = SortCache_Ordinal(sort_cache, b);
- int32_t comparison = ord_a - ord_b;
- if (comparison == 0) { comparison = a - b; }
- return comparison;
-}
-
static void
S_lazy_init_sorted_ids(SortFieldWriter *self) {
SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
- if (!ivars->sorted_ids) {
- ivars->sorted_ids
- = (int32_t*)MALLOCATE((ivars->run_max + 1) * sizeof(int32_t));
- for (int32_t i = 0, max = ivars->run_max; i <= max; i++) {
- ivars->sorted_ids[i] = i;
- }
- Sort_quicksort(ivars->sorted_ids + 1, ivars->run_max, sizeof(int32_t),
- S_compare_doc_ids_by_ord_rev, ivars->sort_cache);
+ if (ivars->sorted_ids) { return; }
+
+ // Counting sort. Could be optimized by working directly on the
+ // ordinal arrays.
+
+ SortCache *sort_cache = ivars->sort_cache;
+ int32_t run_cardinality = ivars->run_cardinality;
+ int32_t run_max = ivars->run_max;
+
+ // Count.
+ int32_t *counts = (int32_t*)CALLOCATE(run_cardinality, sizeof(int32_t));
+ for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+ int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+ ++counts[ord];
+ }
+
+ // Compute partial sums.
+ int32_t sum = 0;
+ for (int32_t ord = 0; ord < run_cardinality; ++ord) {
+ int32_t count = counts[ord];
+ counts[ord] = sum;
+ sum += count;
}
+
+ // Distribute.
+ int32_t *sorted_ids = (int32_t*)MALLOCATE((run_max + 1) * sizeof(int32_t));
+ for (int32_t doc_id = 0; doc_id <= run_max; ++doc_id) {
+ int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
+ int32_t pos = counts[ord]++;
+ sorted_ids[pos] = doc_id;
+ }
+
+ ivars->sorted_ids = sorted_ids;
+ FREEMEM(counts);
}
void