You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2015/08/06 18:22:17 UTC
[1/5] lucy git commit: Switch over to XSBind_hash_key_to_utf8
Repository: lucy
Updated Branches:
refs/heads/master 199561eaf -> 7c09f4df5
Switch over to XSBind_hash_key_to_utf8
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/78a82999
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/78a82999
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/78a82999
Branch: refs/heads/master
Commit: 78a82999f9e60c78bb470e99eda82e5446ae4e84
Parents: 35b4c52
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 17:19:53 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Tue Aug 4 21:57:40 2015 +0200
----------------------------------------------------------------------
perl/xs/Lucy/Index/Inverter.c | 21 +++------------------
1 file changed, 3 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/78a82999/perl/xs/Lucy/Index/Inverter.c
----------------------------------------------------------------------
diff --git a/perl/xs/Lucy/Index/Inverter.c b/perl/xs/Lucy/Index/Inverter.c
index dfd436d..eed3a66 100644
--- a/perl/xs/Lucy/Index/Inverter.c
+++ b/perl/xs/Lucy/Index/Inverter.c
@@ -32,25 +32,10 @@ static lucy_InverterEntry*
S_fetch_entry(pTHX_ lucy_Inverter *self, HE *hash_entry) {
lucy_InverterIVARS *const ivars = lucy_Inverter_IVARS(self);
lucy_Schema *const schema = ivars->schema;
- char *key;
- STRLEN key_len;
- STRLEN he_key_len = HeKLEN(hash_entry);
+ STRLEN key_size;
+ const char *key = XSBind_hash_key_to_utf8(aTHX_ hash_entry, &key_size);
- // Force field name to UTF-8 if necessary.
- if (he_key_len == (STRLEN)HEf_SVKEY) {
- SV *key_sv = HeKEY_sv(hash_entry);
- key = SvPVutf8(key_sv, key_len);
- }
- else {
- key = HeKEY(hash_entry);
- key_len = he_key_len;
- if (!cfish_StrHelp_utf8_valid(key, key_len)) {
- SV *key_sv = HeSVKEY_force(hash_entry);
- key = SvPVutf8(key_sv, key_len);
- }
- }
-
- cfish_String *field = CFISH_SSTR_WRAP_UTF8(key, key_len);
+ cfish_String *field = CFISH_SSTR_WRAP_UTF8(key, key_size);
int32_t field_num = LUCY_Seg_Field_Num(ivars->segment, field);
if (!field_num) {
// This field seems not to be in the segment yet. Try to find it in
[2/5] lucy git commit: Fix hv_fetch with UTF-8 keys
Posted by nw...@apache.org.
Fix hv_fetch with UTF-8 keys
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/35b4c52e
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/35b4c52e
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/35b4c52e
Branch: refs/heads/master
Commit: 35b4c52ebaf789920276154a150615896e8edc47
Parents: 199561e
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 17:19:13 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Tue Aug 4 21:57:40 2015 +0200
----------------------------------------------------------------------
perl/xs/Lucy/Document/Doc.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/35b4c52e/perl/xs/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/perl/xs/Lucy/Document/Doc.c b/perl/xs/Lucy/Document/Doc.c
index 2f54da7..fff4e62 100644
--- a/perl/xs/Lucy/Document/Doc.c
+++ b/perl/xs/Lucy/Document/Doc.c
@@ -150,7 +150,7 @@ LUCY_Doc_Extract_IMP(lucy_Doc *self, cfish_String *field) {
lucy_DocIVARS *const ivars = lucy_Doc_IVARS(self);
cfish_Obj *retval = NULL;
SV **sv_ptr = hv_fetch((HV*)ivars->fields, CFISH_Str_Get_Ptr8(field),
- CFISH_Str_Get_Size(field), 0);
+ -CFISH_Str_Get_Size(field), 0);
if (sv_ptr) {
retval = XSBind_perl_to_cfish(aTHX_ *sv_ptr);
@@ -218,8 +218,20 @@ LUCY_Doc_Equals_IMP(lucy_Doc *self, cfish_Obj *other) {
while (num_fields--) {
HE *my_entry = hv_iternext(my_fields);
SV *my_val_sv = HeVAL(my_entry);
- STRLEN key_len = HeKLEN(my_entry);
- char *key = HeKEY(my_entry);
+ STRLEN key_len;
+ char *key;
+
+ if (HeKLEN(my_entry) == HEf_SVKEY) {
+ SV *key_sv = HeKEY_sv(my_entry);
+ key = SvPV(key_sv, key_len);
+ if (SvUTF8(key_sv)) { key_len = -key_len; }
+ }
+ else {
+ key_len = HeKLEN(my_entry);
+ key = key_len ? HeKEY(my_entry) : Nullch;
+ if (HeKUTF8(my_entry)) { key_len = -key_len; }
+ }
+
SV **const other_val = hv_fetch(other_fields, key, key_len, 0);
if (!other_val) { return false; }
if (!sv_eq(my_val_sv, *other_val)) { return false; }
[5/5] lucy git commit: Port Lucy::Simple tests to C
Posted by nw...@apache.org.
Port Lucy::Simple tests to C
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/7c09f4df
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/7c09f4df
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/7c09f4df
Branch: refs/heads/master
Commit: 7c09f4df572acc3949f9a8c409a00fa3876467aa
Parents: ebde55f
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 18:12:34 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Aug 5 15:26:15 2015 +0200
----------------------------------------------------------------------
core/Lucy/Test.c | 2 +
core/Lucy/Test/TestSimple.c | 112 +++++++++++++++++++++++++++++++++++++
core/Lucy/Test/TestSimple.cfh | 29 ++++++++++
perl/t/core/308-simple.t | 23 ++++++++
4 files changed, 166 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/core/Lucy/Test.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c
index 3046494..0edda50 100644
--- a/core/Lucy/Test.c
+++ b/core/Lucy/Test.c
@@ -77,6 +77,7 @@
#include "Lucy/Test/Store/TestRAMFileHandle.h"
#include "Lucy/Test/Store/TestRAMFolder.h"
#include "Lucy/Test/TestSchema.h"
+#include "Lucy/Test/TestSimple.h"
#include "Lucy/Test/Util/TestFreezer.h"
#include "Lucy/Test/Util/TestIndexFileNames.h"
#include "Lucy/Test/Util/TestJson.h"
@@ -136,6 +137,7 @@ Test_create_test_suite() {
TestSuite_Add_Batch(suite, (TestBatch*)TestFType_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestSeg_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestHighlighter_new());
+ TestSuite_Add_Batch(suite, (TestBatch*)TestSimple_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestSpan_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestHeatMap_new());
TestSuite_Add_Batch(suite, (TestBatch*)TestTermQuery_new());
http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/core/Lucy/Test/TestSimple.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/TestSimple.c b/core/Lucy/Test/TestSimple.c
new file mode 100644
index 0000000..6e8adfd
--- /dev/null
+++ b/core/Lucy/Test/TestSimple.c
@@ -0,0 +1,112 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define C_TESTLUCY_TESTSIMPLE
+#define TESTLUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test/TestSimple.h"
+#include "Lucy/Simple.h"
+
+#include "Clownfish/TestHarness/TestBatchRunner.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Store/RAMFolder.h"
+
+TestSimple*
+TestSimple_new() {
+ return (TestSimple*)Class_Make_Obj(TESTSIMPLE);
+}
+
+static void
+test_simple(TestBatchRunner *runner) {
+ RAMFolder *folder = RAMFolder_new(NULL);
+ String *language = SSTR_WRAP_UTF8("en", 2);
+ Simple *lucy = Simple_new((Obj*)folder, language);
+
+ String *food_field = SSTR_WRAP_UTF8("food", 4);
+
+ {
+ Doc *doc = Doc_new(NULL, 0);
+ String *value = SSTR_WRAP_UTF8("creamed corn", 12);
+ Doc_Store(doc, food_field, (Obj*)value);
+ Simple_Add_Doc(lucy, doc);
+ DECREF(doc);
+
+ String *query = SSTR_WRAP_UTF8("creamed", 7);
+ uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+ TEST_INT_EQ(runner, num_results, 1, "Search works right after add");
+ }
+
+ {
+ Doc *doc = Doc_new(NULL, 0);
+ String *value = SSTR_WRAP_UTF8("creamed spinach", 15);
+ Doc_Store(doc, food_field, (Obj*)value);
+ Simple_Add_Doc(lucy, doc);
+ DECREF(doc);
+
+ String *query = SSTR_WRAP_UTF8("creamed", 7);
+ uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+ TEST_INT_EQ(runner, num_results, 2, "Search returns total hits");
+ }
+
+ {
+ Doc *doc = Doc_new(NULL, 0);
+ String *value = SSTR_WRAP_UTF8("creamed broccoli", 16);
+ Doc_Store(doc, food_field, (Obj*)value);
+ Simple_Add_Doc(lucy, doc);
+ DECREF(doc);
+
+ DECREF(lucy);
+ lucy = Simple_new((Obj*)folder, language);
+
+ String *query = SSTR_WRAP_UTF8("cream", 5);
+ uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+ TEST_INT_EQ(runner, num_results, 3, "commit upon destroy");
+
+ HitDoc *hit;
+ while ((hit = Simple_Next(lucy)) != NULL) {
+ String *food = (String*)HitDoc_Extract(hit, food_field);
+ TEST_TRUE(runner, Str_Starts_With_Utf8(food, "cream", 5), "Next");
+ DECREF(food);
+ DECREF(hit);
+ }
+ }
+
+ {
+ Doc *doc = Doc_new(NULL, 0);
+ String *band_field = SSTR_WRAP_UTF8("band", 4);
+ String *value = SSTR_WRAP_UTF8("Cream", 5);
+ Doc_Store(doc, band_field, (Obj*)value);
+ Simple_Add_Doc(lucy, doc);
+ DECREF(doc);
+
+ String *query = SSTR_WRAP_UTF8("cream", 5);
+ uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+ TEST_INT_EQ(runner, num_results, 4,
+ "Search uses correct EasyAnalyzer");
+ }
+
+ DECREF(lucy);
+ DECREF(folder);
+}
+
+void
+TestSimple_Run_IMP(TestSimple *self, TestBatchRunner *runner) {
+ TestBatchRunner_Plan(runner, (TestBatch*)self, 7);
+ test_simple(runner);
+}
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/core/Lucy/Test/TestSimple.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/TestSimple.cfh b/core/Lucy/Test/TestSimple.cfh
new file mode 100644
index 0000000..045fe8c
--- /dev/null
+++ b/core/Lucy/Test/TestSimple.cfh
@@ -0,0 +1,29 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestLucy;
+
+class Lucy::Test::TestSimple
+ inherits Clownfish::TestHarness::TestBatch {
+
+ inert incremented TestSimple*
+ new();
+
+ void
+ Run(TestSimple *self, TestBatchRunner *runner);
+}
+
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/perl/t/core/308-simple.t
----------------------------------------------------------------------
diff --git a/perl/t/core/308-simple.t b/perl/t/core/308-simple.t
new file mode 100644
index 0000000..5a558a1
--- /dev/null
+++ b/perl/t/core/308-simple.t
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+my $success = Lucy::Test::run_tests("Lucy::Test::TestSimple");
+
+exit($success ? 0 : 1);
+
[4/5] lucy git commit: Port Lucy::Simple to C
Posted by nw...@apache.org.
Port Lucy::Simple to C
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/ebde55f3
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/ebde55f3
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/ebde55f3
Branch: refs/heads/master
Commit: ebde55f3716e3f746095ce957054b1eecf936a4d
Parents: f1c3021
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 03:43:00 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Aug 5 15:26:15 2015 +0200
----------------------------------------------------------------------
core/Lucy/Simple.c | 190 +++++++++++++++++++
core/Lucy/Simple.cfh | 99 ++++++++++
perl/buildlib/Lucy/Build/Binding/Misc.pm | 107 +++++++++++
perl/lib/Lucy/Simple.pm | 261 --------------------------
4 files changed, 396 insertions(+), 261 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/core/Lucy/Simple.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Simple.c b/core/Lucy/Simple.c
new file mode 100644
index 0000000..2271984
--- /dev/null
+++ b/core/Lucy/Simple.c
@@ -0,0 +1,190 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define CFISH_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
+
+#define C_LUCY_SIMPLE
+#include "Lucy/Simple.h"
+
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/HashIterator.h"
+#include "Clownfish/String.h"
+#include "Clownfish/Vector.h"
+#include "Lucy/Analysis/EasyAnalyzer.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Index/Indexer.h"
+#include "Lucy/Index/PolyReader.h"
+#include "Lucy/Plan/FullTextType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Search/Hits.h"
+#include "Lucy/Search/IndexSearcher.h"
+
+Simple*
+Simple_new(Obj *index, String *language) {
+ Simple *self = (Simple*)Class_Make_Obj(SIMPLE);
+ return Simple_init(self, index, language);
+}
+
+Simple*
+Simple_init(Simple *self, Obj *index, String *language) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+ ivars->index = INCREF(index);
+ ivars->language = Str_Clone(language);
+ return self;
+}
+
+void
+Simple_Destroy_IMP(Simple *self) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ Simple_Finish_Indexing(self);
+
+ DECREF(ivars->index);
+ DECREF(ivars->language);
+ DECREF(ivars->schema);
+ DECREF(ivars->type);
+ DECREF(ivars->indexer);
+ DECREF(ivars->searcher);
+ DECREF(ivars->hits);
+
+ SUPER_DESTROY(self, SIMPLE);
+}
+
+static void
+S_create_indexer(Simple *self) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ // Trigger searcher refresh.
+ DECREF(ivars->searcher);
+ DECREF(ivars->hits);
+ ivars->searcher = NULL;
+ ivars->hits = NULL;
+
+ // Get type and schema
+ Schema *schema = NULL;
+ FieldType *type = NULL;
+ PolyReader *reader = PolyReader_open(ivars->index, NULL, NULL);
+ Vector *seg_readers = PolyReader_Get_Seg_Readers(reader);
+
+ if (Vec_Get_Size(seg_readers) == 0) {
+ // Index is empty, create new schema and type.
+ schema = Schema_new();
+ EasyAnalyzer *analyzer = EasyAnalyzer_new(ivars->language);
+ type = (FieldType*)FullTextType_new((Analyzer*)analyzer);
+ DECREF(analyzer);
+ }
+ else {
+ // Get schema from reader.
+ schema = (Schema*)INCREF(PolyReader_Get_Schema(reader));
+ Vector *fields = Schema_All_Fields(schema);
+ String *field = (String*)CERTIFY(Vec_Fetch(fields, 0), STRING);
+ type = (FieldType*)INCREF(Schema_Fetch_Type(schema, field));
+ DECREF(fields);
+ }
+
+ ivars->indexer = Indexer_new(schema, ivars->index, NULL, 0);
+ ivars->schema = schema;
+ ivars->type = type;
+
+ DECREF(reader);
+}
+
+void
+Simple_Add_Doc_IMP(Simple *self, Doc *doc) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ if (!ivars->indexer) {
+ S_create_indexer(self);
+ }
+
+ Vector *field_names = Doc_Field_Names(doc);
+
+ for (size_t i = 0, max = Vec_Get_Size(field_names); i < max; i++) {
+ String *field = (String*)Vec_Fetch(field_names, i);
+ Schema_Spec_Field(ivars->schema, field, ivars->type);
+ }
+
+ Indexer_Add_Doc(ivars->indexer, doc, 1.0);
+
+ DECREF(field_names);
+}
+
+uint32_t
+Simple_Search_IMP(Simple *self, String *query, uint32_t offset,
+ uint32_t num_wanted) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ // Flush recent adds; lazily create searcher.
+ Simple_Finish_Indexing(self);
+ if (!ivars->searcher) {
+ ivars->searcher = IxSearcher_new(ivars->index);
+ }
+
+ DECREF(ivars->hits);
+ ivars->hits = IxSearcher_Hits(ivars->searcher, (Obj*)query, offset,
+ num_wanted, NULL);
+
+ return Hits_Total_Hits(ivars->hits);
+}
+
+HitDoc*
+Simple_Next_IMP(Simple *self) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ if (!ivars->hits) { return NULL; }
+
+ // Get the hit, bail if hits are exhausted.
+ HitDoc *doc = Hits_Next(ivars->hits);
+ if (!doc) {
+ DECREF(ivars->hits);
+ ivars->hits = NULL;
+ }
+
+ return doc;
+}
+
+Indexer*
+Simple_Get_Indexer_IMP(Simple *self) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ if (!ivars->indexer) {
+ S_create_indexer(self);
+ }
+
+ return ivars->indexer;
+}
+
+void
+Simple_Finish_Indexing_IMP(Simple *self) {
+ SimpleIVARS *const ivars = Simple_IVARS(self);
+
+ // Don't bother to throw an error if index not modified.
+ if (ivars->indexer) {
+ Indexer_Commit(ivars->indexer);
+
+ // Trigger searcher and indexer refresh.
+ DECREF(ivars->schema);
+ DECREF(ivars->type);
+ DECREF(ivars->indexer);
+ ivars->schema = NULL;
+ ivars->type = NULL;
+ ivars->indexer = NULL;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/core/Lucy/Simple.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Simple.cfh b/core/Lucy/Simple.cfh
new file mode 100644
index 0000000..3680ce5
--- /dev/null
+++ b/core/Lucy/Simple.cfh
@@ -0,0 +1,99 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel Lucy;
+
+/** Basic search engine.
+ *
+ * Lucy::Simple is a stripped-down interface for the Apache Lucy search
+ * engine library.
+ */
+public class Lucy::Simple {
+
+ Obj *index;
+ String *language;
+ Schema *schema;
+ FieldType *type;
+ Indexer *indexer;
+ IndexSearcher *searcher;
+ Hits *hits;
+
+ /** Create a Lucy::Simple object, which can be used for both indexing and
+ * searching. Both parameters `path` and `language` are required.
+ *
+ * @param path Where the index directory should be located. If no index
+ * is found at the specified location, one will be created.
+ * @param language The language of the documents in your collection,
+ * indicated by a two-letter ISO code. 12 languages are supported:
+ *
+ * |-----------------------|
+ * | Language | ISO code |
+ * |-----------------------|
+ * | Danish | da |
+ * | Dutch | nl |
+ * | English | en |
+ * | Finnish | fi |
+ * | French | fr |
+ * | German | de |
+ * | Italian | it |
+ * | Norwegian | no |
+ * | Portuguese | pt |
+ * | Spanish | es |
+ * | Swedish | sv |
+ * | Russian | ru |
+ * |-----------------------|
+ */
+ public inert Simple*
+ new(Obj *path, String *language);
+
+ public inert Simple*
+ init(Simple *self, Obj *path, String *language);
+
+ /** Add a document to the index.
+ */
+ public void
+ Add_Doc(Simple *self, Doc *doc);
+
+ /** Search the index. Returns the total number of documents which match
+ * the query. (This number is unlikely to match `num_wanted`.)
+ *
+ * @param query A search query string.
+ * @param offset The number of most-relevant hits to discard, typically
+ * used when "paging" through hits N at a time. Setting offset to 20 and
+ * num_wanted to 10 retrieves hits 21-30, assuming that 30 hits can be
+ * found.
+ * @param num_wanted The number of hits you would like to see after
+ * `offset` is taken into account.
+ */
+ public uint32_t
+ Search(Simple *self, String *query, uint32_t offset = 0,
+ uint32_t num_wanted = 10);
+
+ /** Return the next hit, or [](cfish:@null) when the iterator is exhausted.
+ */
+ public incremented nullable HitDoc*
+ Next(Simple *self);
+
+ Indexer*
+ Get_Indexer(Simple *self);
+
+ void
+ Finish_Indexing(Simple *self);
+
+ public void
+ Destroy(Simple *self);
+}
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/perl/buildlib/Lucy/Build/Binding/Misc.pm
----------------------------------------------------------------------
diff --git a/perl/buildlib/Lucy/Build/Binding/Misc.pm b/perl/buildlib/Lucy/Build/Binding/Misc.pm
index ebd4fea..92c8b95 100644
--- a/perl/buildlib/Lucy/Build/Binding/Misc.pm
+++ b/perl/buildlib/Lucy/Build/Binding/Misc.pm
@@ -26,6 +26,7 @@ sub bind_all {
$hierarchy->inherit_metadata;
$class->bind_lucy;
+ $class->bind_simple;
$class->bind_test;
}
@@ -148,6 +149,112 @@ END_XS_CODE
Clownfish::CFC::Binding::Perl::Class->register($binding);
}
+sub bind_simple {
+ my @exposed = qw(
+ Search
+ Next
+ );
+ my @hand_rolled = qw( Add_Doc );
+
+ my $pod_spec = Clownfish::CFC::Binding::Perl::Pod->new;
+ my $synopsis = <<'END_SYNOPSIS';
+First, build an index of your documents.
+
+ my $index = Lucy::Simple->new(
+ path => '/path/to/index/'
+ language => 'en',
+ );
+
+ while ( my ( $title, $content ) = each %source_docs ) {
+ $index->add_doc({
+ title => $title,
+ content => $content,
+ });
+ }
+
+Later, search the index.
+
+ my $total_hits = $index->search(
+ query => $query_string,
+ offset => 0,
+ num_wanted => 10,
+ );
+
+ print "Total hits: $total_hits\n";
+ while ( my $hit = $index->next ) {
+ print "$hit->{title}\n",
+ }
+END_SYNOPSIS
+ my $add_doc_pod = <<'END_ADD_DOC_POD';
+=head2 add_doc
+
+ $lucy->add_doc({
+ location => $url,
+ title => $title,
+ content => $content,
+ });
+
+Add a document to the index. The document must be supplied as a hashref,
+with field names as keys and content as values.
+
+END_ADD_DOC_POD
+ $pod_spec->set_synopsis($synopsis);
+
+ # Override is necessary because there's no standard way to explain
+ # hash/hashref across multiple host languages.
+ $pod_spec->add_method(
+ method => 'Add_Doc',
+ alias => 'add_doc',
+ pod => $add_doc_pod,
+ );
+ $pod_spec->add_method( method => $_, alias => lc($_) ) for @exposed;
+
+ my $xs_code = <<'END_XS_CODE';
+MODULE = Lucy PACKAGE = Lucy::Simple
+
+void
+add_doc(self, doc_sv)
+ lucy_Simple *self;
+ SV *doc_sv;
+PPCODE:
+{
+ lucy_Doc *doc = NULL;
+
+ // Either get a Doc or use the stock doc.
+ if (sv_isobject(doc_sv)
+ && sv_derived_from(doc_sv, "Lucy::Document::Doc")
+ ) {
+ IV tmp = SvIV(SvRV(doc_sv));
+ doc = INT2PTR(lucy_Doc*, tmp);
+ }
+ else if (XSBind_sv_defined(aTHX_ doc_sv) && SvROK(doc_sv)) {
+ HV *maybe_fields = (HV*)SvRV(doc_sv);
+ if (SvTYPE((SV*)maybe_fields) == SVt_PVHV) {
+ lucy_Indexer *indexer = LUCY_Simple_Get_Indexer(self);
+ doc = LUCY_Indexer_Get_Stock_Doc(indexer);
+ LUCY_Doc_Set_Fields(doc, maybe_fields);
+ }
+ }
+ if (!doc) {
+ THROW(CFISH_ERR, "Need either a hashref or a %o",
+ CFISH_Class_Get_Name(LUCY_DOC));
+ }
+
+ LUCY_Simple_Add_Doc(self, doc);
+}
+END_XS_CODE
+
+ my $binding = Clownfish::CFC::Binding::Perl::Class->new(
+ parcel => "Lucy",
+ class_name => "Lucy::Simple",
+ );
+ $binding->exclude_method($_) for @hand_rolled;
+ $binding->append_xs($xs_code);
+ $binding->set_pod_spec($pod_spec);
+
+ Clownfish::CFC::Binding::Perl::Class->register($binding);
+}
+
sub bind_test {
my $xs_code = <<'END_XS_CODE';
MODULE = Lucy PACKAGE = Lucy::Test
http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/perl/lib/Lucy/Simple.pm
----------------------------------------------------------------------
diff --git a/perl/lib/Lucy/Simple.pm b/perl/lib/Lucy/Simple.pm
index e409615..e56a6ba 100644
--- a/perl/lib/Lucy/Simple.pm
+++ b/perl/lib/Lucy/Simple.pm
@@ -13,274 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-use strict;
-use warnings;
-
package Lucy::Simple;
use Lucy;
our $VERSION = '0.004000';
$VERSION = eval $VERSION;
-use Carp;
-use Scalar::Util qw( weaken reftype refaddr );
-
-use Lucy::Plan::Schema;
-use Lucy::Analysis::EasyAnalyzer;
-use Lucy::Index::Indexer;
-use Lucy::Search::IndexSearcher;
-
-my %obj_cache;
-
-sub new {
- my ( $either, %args ) = @_;
- my $path = delete $args{path};
- my $language = lc( delete $args{language} );
- confess("Missing required parameter 'path'") unless defined $path;
- confess("Invalid language: '$language'")
- unless $language =~ /^(?:da|de|en|es|fi|fr|it|nl|no|pt|ru|sv)$/;
- my @remaining = keys %args;
- confess("Invalid params: @remaining") if @remaining;
- my $self = bless {
- type => undef,
- schema => undef,
- indexer => undef,
- searcher => undef,
- hits => undef,
- language => $language,
- path => $path,
- },
- ref($either) || $either;
-
- # Cache the object for later clean-up.
- weaken( $obj_cache{ refaddr $self } = $self );
-
- return $self;
-}
-
-sub _lazily_create_indexer {
- my $self = shift;
- if ( !defined $self->{indexer} ) {
- # Get type and schema
- my $schema;
- my $reader = Lucy::Index::PolyReader->open( index => $self->{path} );
- if ( !@{ $reader->seg_readers } ) {
- # index is empty, create new schema and type
- $schema = Lucy::Plan::Schema->new;
- my $analyzer = Lucy::Analysis::EasyAnalyzer->new(
- language => $self->{language}, );
- $self->{type}
- = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
- }
- else {
- # get schema from reader
- $schema = $reader->get_schema;
- my $field = $schema->all_fields->[0];
- $self->{type} = $schema->fetch_type($field);
- }
- $self->{schema} = $schema;
- $self->{indexer} = Lucy::Index::Indexer->new(
- schema => $schema,
- index => $self->{path},
- );
- }
-}
-
-sub add_doc {
- my ( $self, $hashref ) = @_;
- croak("add_doc requires exactly one argument: a hashref")
- unless ( @_ == 2 and reftype($hashref) eq 'HASH' );
- $self->_lazily_create_indexer;
- my $schema = $self->{schema};
- my $type = $self->{type};
- $schema->spec_field( name => $_, type => $type ) for keys %$hashref;
- $self->{indexer}->add_doc($hashref);
-}
-
-sub _finish_indexing {
- my $self = shift;
-
- # Don't bother to throw an error if index not modified.
- if ( defined $self->{indexer} ) {
- $self->{indexer}->commit;
-
- # Trigger searcher and indexer refresh.
- undef $self->{indexer};
- undef $self->{searcher};
- }
-}
-
-sub search {
- my ( $self, %args ) = @_;
-
- # Flush recent adds; lazily create searcher.
- $self->_finish_indexing;
- if ( !defined $self->{searcher} ) {
- $self->{searcher}
- = Lucy::Search::IndexSearcher->new( index => $self->{path} );
- }
-
- $self->{hits} = $self->{searcher}->hits(%args);
-
- return $self->{hits}->total_hits;
-}
-
-sub next {
- my $self = shift;
- return unless defined $self->{hits};
-
- # Get the hit, bail if hits are exhausted.
- my $hit = $self->{hits}->next;
- if ( !defined $hit ) {
- undef $self->{hits};
- return;
- }
-
- return $hit;
-}
-
-sub DESTROY {
- for (shift) {
- $_->_finish_indexing;
- delete $obj_cache{ refaddr $_ };
- }
-}
-
-END {
- # Finish indexing for any objects that still exist, since, if we wait
- # until global destruction, our Indexer might no longer exist,
- # (see bug #32689)
- $_->_finish_indexing for values %obj_cache;
-}
1;
__END__
-__POD__
-
-=head1 NAME
-
-Lucy::Simple - Basic search engine.
-
-=head1 SYNOPSIS
-
-First, build an index of your documents.
-
- my $index = Lucy::Simple->new(
- path => '/path/to/index/'
- language => 'en',
- );
-
- while ( my ( $title, $content ) = each %source_docs ) {
- $index->add_doc({
- title => $title,
- content => $content,
- });
- }
-
-Later, search the index.
-
- my $total_hits = $index->search(
- query => $query_string,
- offset => 0,
- num_wanted => 10,
- );
-
- print "Total hits: $total_hits\n";
- while ( my $hit = $index->next ) {
- print "$hit->{title}\n",
- }
-
-=head1 DESCRIPTION
-
-Lucy::Simple is a stripped-down interface for the L<Apache Lucy|Lucy> search
-engine library.
-
-=head1 METHODS
-
-=head2 new
-
- my $lucy = Lucy::Simple->new(
- path => '/path/to/index/',
- language => 'en',
- );
-
-Create a Lucy::Simple object, which can be used for both indexing and
-searching. Two hash-style parameters are required.
-
-=over
-
-=item *
-
-B<path> - Where the index directory should be located. If no index is found
-at the specified location, one will be created.
-
-=item *
-
-B<language> - The language of the documents in your collection, indicated
-by a two-letter ISO code. 12 languages are supported:
-
- |-----------------------|
- | Language | ISO code |
- |-----------------------|
- | Danish | da |
- | Dutch | nl |
- | English | en |
- | Finnish | fi |
- | French | fr |
- | German | de |
- | Italian | it |
- | Norwegian | no |
- | Portuguese | pt |
- | Spanish | es |
- | Swedish | sv |
- | Russian | ru |
- |-----------------------|
-
-=back
-
-=head2 add_doc
-
- $lucy->add_doc({
- location => $url,
- title => $title,
- content => $content,
- });
-
-Add a document to the index. The document must be supplied as a hashref, with
-field names as keys and content as values.
-
-=head2 search
-
- my $total_hits = $lucy->search(
- query => $query_string, # required
- offset => 40, # default 0
- num_wanted => 20, # default 10
- );
-
-Search the index. Returns the total number of documents which match the
-query. (This number is unlikely to match C<num_wanted>.)
-
-=over
-
-=item *
-
-B<query> - A search query string.
-
-=item *
-
-B<offset> - The number of most-relevant hits to discard, typically used when
-"paging" through hits N at a time. Setting offset to 20 and num_wanted to 10
-retrieves hits 21-30, assuming that 30 hits can be found.
-
-=item *
-
-B<num_wanted> - The number of hits you would like to see after C<offset> is
-taken into account.
-
-=back
-
-=head1 BUGS
-
-Not thread-safe.
-=cut
[3/5] lucy git commit: Implement Doc_Field_Names
Posted by nw...@apache.org.
Implement Doc_Field_Names
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/f1c30213
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/f1c30213
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/f1c30213
Branch: refs/heads/master
Commit: f1c30213afc3b2d9b1838ce662a127f0004ed5d8
Parents: 78a8299
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 17:22:18 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Aug 5 15:26:15 2015 +0200
----------------------------------------------------------------------
c/src/Lucy/Document/Doc.c | 7 +++++++
core/Lucy/Document/Doc.cfh | 5 +++++
go/cfext/lucy.c | 7 +++++++
go/lucy/lucy.go | 12 ++++++++++++
perl/xs/Lucy/Document/Doc.c | 20 ++++++++++++++++++++
5 files changed, 51 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/c/src/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Document/Doc.c b/c/src/Lucy/Document/Doc.c
index 717059f..d0fb065 100644
--- a/c/src/Lucy/Document/Doc.c
+++ b/c/src/Lucy/Document/Doc.c
@@ -23,6 +23,7 @@
#include "Clownfish/Err.h"
#include "Clownfish/Hash.h"
#include "Clownfish/Class.h"
+#include "Clownfish/Vector.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/OutStream.h"
#include "Lucy/Util/Freezer.h"
@@ -85,6 +86,12 @@ Doc_Extract_IMP(Doc *self, String *field) {
return INCREF(Hash_Fetch(hash, field));
}
+Vector*
+Doc_Field_Names_IMP(Doc *self) {
+ Hash *hash = (Hash*)Doc_IVARS(self)->fields;
+ return Hash_Keys(hash);
+}
+
Hash*
Doc_Dump_IMP(Doc *self) {
UNUSED_VAR(self);
http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/core/Lucy/Document/Doc.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Document/Doc.cfh b/core/Lucy/Document/Doc.cfh
index d30a6b3..c1da681 100644
--- a/core/Lucy/Document/Doc.cfh
+++ b/core/Lucy/Document/Doc.cfh
@@ -76,6 +76,11 @@ public class Lucy::Document::Doc inherits Clownfish::Obj {
nullable incremented Obj*
Extract(Doc *self, String *field);
+ /** Return a list of names of all fields present.
+ */
+ incremented Vector*
+ Field_Names(Doc *self);
+
/* Unimplemented methods.
*/
public bool
http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/go/cfext/lucy.c
----------------------------------------------------------------------
diff --git a/go/cfext/lucy.c b/go/cfext/lucy.c
index 9e9b840..fc2df3b 100644
--- a/go/cfext/lucy.c
+++ b/go/cfext/lucy.c
@@ -135,6 +135,13 @@ Doc_Extract_IMP(Doc *self, String *field) {
return GOLUCY_Doc_Extract_BRIDGE(self, field);
}
+Doc_Field_Names_t GOLUCY_Doc_Field_Names_BRIDGE;
+
+Vector*
+Doc_Field_Names_IMP(Doc *self) {
+ return GOLUCY_Doc_Field_Names_BRIDGE(self);
+}
+
Hash*
Doc_Dump_IMP(Doc *self) {
UNUSED_VAR(self);
http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/go/lucy/lucy.go
----------------------------------------------------------------------
diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go
index bc2e9f8..fcddfbd 100644
--- a/go/lucy/lucy.go
+++ b/go/lucy/lucy.go
@@ -95,6 +95,10 @@ extern cfish_Obj*
GOLUCY_Doc_Extract(lucy_Doc *self, cfish_String *field);
extern cfish_Obj*
(*GOLUCY_Doc_Extract_BRIDGE)(lucy_Doc *self, cfish_String *field);
+extern cfish_Vector*
+GOLUCY_Doc_Field_Names(lucy_Doc *self);
+extern cfish_Vector*
+(*GOLUCY_Doc_Field_Names_BRIDGE)(lucy_Doc *self);
extern bool
GOLUCY_Doc_Equals(lucy_Doc *self, cfish_Obj *other);
extern bool
@@ -132,6 +136,7 @@ GOLUCY_glue_exported_symbols() {
GOLUCY_Doc_Serialize_BRIDGE = GOLUCY_Doc_Serialize;
GOLUCY_Doc_Deserialize_BRIDGE = GOLUCY_Doc_Deserialize;
GOLUCY_Doc_Extract_BRIDGE = GOLUCY_Doc_Extract;
+ GOLUCY_Doc_Field_Names_BRIDGE = GOLUCY_Doc_Field_Names;
GOLUCY_Doc_Equals_BRIDGE = GOLUCY_Doc_Equals;
GOLUCY_Doc_Destroy_BRIDGE = GOLUCY_Doc_Destroy;
GOLUCY_DefDocReader_Fetch_Doc_BRIDGE = GOLUCY_DefDocReader_Fetch_Doc;
@@ -306,6 +311,13 @@ func GOLUCY_Doc_Extract(d *C.lucy_Doc, field *C.cfish_String) *C.cfish_Obj {
return C.cfish_inc_refcount(unsafe.Pointer(val))
}
+//export GOLUCY_Doc_Field_Names
+func GOLUCY_Doc_Field_Names(d *C.lucy_Doc) *C.cfish_Vector {
+ ivars := C.lucy_Doc_IVARS(d)
+ hash := (*C.cfish_Hash)(ivars.fields)
+ return C.CFISH_Hash_Keys(hash)
+}
+
//export GOLUCY_Doc_Equals
func GOLUCY_Doc_Equals(d *C.lucy_Doc, other *C.cfish_Obj) C.bool {
twin := (*C.lucy_Doc)(unsafe.Pointer(other))
http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/perl/xs/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/perl/xs/Lucy/Document/Doc.c b/perl/xs/Lucy/Document/Doc.c
index fff4e62..c352f4e 100644
--- a/perl/xs/Lucy/Document/Doc.c
+++ b/perl/xs/Lucy/Document/Doc.c
@@ -159,6 +159,26 @@ LUCY_Doc_Extract_IMP(lucy_Doc *self, cfish_String *field) {
return retval;
}
+cfish_Vector*
+LUCY_Doc_Field_Names_IMP(lucy_Doc *self) {
+ dTHX;
+ lucy_DocIVARS *const ivars = lucy_Doc_IVARS(self);
+
+ HV *fields = (HV*)ivars->fields;
+ I32 num_fields = hv_iterinit(fields);
+ cfish_Vector *retval = cfish_Vec_new(num_fields);
+
+ while (num_fields--) {
+ HE *entry = hv_iternext(fields);
+ STRLEN key_size;
+ const char *key = XSBind_hash_key_to_utf8(aTHX_ entry, &key_size);
+ cfish_String *key_str = cfish_Str_new_from_trusted_utf8(key, key_size);
+ CFISH_Vec_Push(retval, (cfish_Obj*)key_str);
+ }
+
+ return retval;
+}
+
cfish_Hash*
LUCY_Doc_Dump_IMP(lucy_Doc *self) {
dTHX;