You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2015/08/06 18:22:17 UTC

[1/5] lucy git commit: Switch over to XSBind_hash_key_to_utf8

Repository: lucy
Updated Branches:
  refs/heads/master 199561eaf -> 7c09f4df5


Switch over to XSBind_hash_key_to_utf8


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/78a82999
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/78a82999
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/78a82999

Branch: refs/heads/master
Commit: 78a82999f9e60c78bb470e99eda82e5446ae4e84
Parents: 35b4c52
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 17:19:53 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Tue Aug 4 21:57:40 2015 +0200

----------------------------------------------------------------------
 perl/xs/Lucy/Index/Inverter.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/78a82999/perl/xs/Lucy/Index/Inverter.c
----------------------------------------------------------------------
diff --git a/perl/xs/Lucy/Index/Inverter.c b/perl/xs/Lucy/Index/Inverter.c
index dfd436d..eed3a66 100644
--- a/perl/xs/Lucy/Index/Inverter.c
+++ b/perl/xs/Lucy/Index/Inverter.c
@@ -32,25 +32,10 @@ static lucy_InverterEntry*
 S_fetch_entry(pTHX_ lucy_Inverter *self, HE *hash_entry) {
     lucy_InverterIVARS *const ivars = lucy_Inverter_IVARS(self);
     lucy_Schema *const schema = ivars->schema;
-    char *key;
-    STRLEN key_len;
-    STRLEN he_key_len = HeKLEN(hash_entry);
+    STRLEN key_size;
+    const char *key = XSBind_hash_key_to_utf8(aTHX_ hash_entry, &key_size);
 
-    // Force field name to UTF-8 if necessary.
-    if (he_key_len == (STRLEN)HEf_SVKEY) {
-        SV *key_sv = HeKEY_sv(hash_entry);
-        key = SvPVutf8(key_sv, key_len);
-    }
-    else {
-        key = HeKEY(hash_entry);
-        key_len = he_key_len;
-        if (!cfish_StrHelp_utf8_valid(key, key_len)) {
-            SV *key_sv = HeSVKEY_force(hash_entry);
-            key = SvPVutf8(key_sv, key_len);
-        }
-    }
-
-    cfish_String *field = CFISH_SSTR_WRAP_UTF8(key, key_len);
+    cfish_String *field = CFISH_SSTR_WRAP_UTF8(key, key_size);
     int32_t field_num = LUCY_Seg_Field_Num(ivars->segment, field);
     if (!field_num) {
         // This field seems not to be in the segment yet.  Try to find it in


[2/5] lucy git commit: Fix hv_fetch with UTF-8 keys

Posted by nw...@apache.org.
Fix hv_fetch with UTF-8 keys


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/35b4c52e
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/35b4c52e
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/35b4c52e

Branch: refs/heads/master
Commit: 35b4c52ebaf789920276154a150615896e8edc47
Parents: 199561e
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 17:19:13 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Tue Aug 4 21:57:40 2015 +0200

----------------------------------------------------------------------
 perl/xs/Lucy/Document/Doc.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/35b4c52e/perl/xs/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/perl/xs/Lucy/Document/Doc.c b/perl/xs/Lucy/Document/Doc.c
index 2f54da7..fff4e62 100644
--- a/perl/xs/Lucy/Document/Doc.c
+++ b/perl/xs/Lucy/Document/Doc.c
@@ -150,7 +150,7 @@ LUCY_Doc_Extract_IMP(lucy_Doc *self, cfish_String *field) {
     lucy_DocIVARS *const ivars = lucy_Doc_IVARS(self);
     cfish_Obj *retval = NULL;
     SV **sv_ptr = hv_fetch((HV*)ivars->fields, CFISH_Str_Get_Ptr8(field),
-                           CFISH_Str_Get_Size(field), 0);
+                           -CFISH_Str_Get_Size(field), 0);
 
     if (sv_ptr) {
         retval = XSBind_perl_to_cfish(aTHX_ *sv_ptr);
@@ -218,8 +218,20 @@ LUCY_Doc_Equals_IMP(lucy_Doc *self, cfish_Obj *other) {
     while (num_fields--) {
         HE *my_entry = hv_iternext(my_fields);
         SV *my_val_sv = HeVAL(my_entry);
-        STRLEN key_len = HeKLEN(my_entry);
-        char *key = HeKEY(my_entry);
+        STRLEN key_len;
+        char *key;
+
+        if (HeKLEN(my_entry) == HEf_SVKEY) {
+            SV *key_sv = HeKEY_sv(my_entry);
+            key = SvPV(key_sv, key_len);
+            if (SvUTF8(key_sv)) { key_len = -key_len; }
+        }
+        else {
+            key_len = HeKLEN(my_entry);
+            key = key_len ? HeKEY(my_entry) : Nullch;
+            if (HeKUTF8(my_entry)) { key_len = -key_len; }
+        }
+
         SV **const other_val = hv_fetch(other_fields, key, key_len, 0);
         if (!other_val) { return false; }
         if (!sv_eq(my_val_sv, *other_val)) { return false; }


[5/5] lucy git commit: Port Lucy::Simple tests to C

Posted by nw...@apache.org.
Port Lucy::Simple tests to C


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/7c09f4df
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/7c09f4df
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/7c09f4df

Branch: refs/heads/master
Commit: 7c09f4df572acc3949f9a8c409a00fa3876467aa
Parents: ebde55f
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 18:12:34 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Aug 5 15:26:15 2015 +0200

----------------------------------------------------------------------
 core/Lucy/Test.c              |   2 +
 core/Lucy/Test/TestSimple.c   | 112 +++++++++++++++++++++++++++++++++++++
 core/Lucy/Test/TestSimple.cfh |  29 ++++++++++
 perl/t/core/308-simple.t      |  23 ++++++++
 4 files changed, 166 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/core/Lucy/Test.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c
index 3046494..0edda50 100644
--- a/core/Lucy/Test.c
+++ b/core/Lucy/Test.c
@@ -77,6 +77,7 @@
 #include "Lucy/Test/Store/TestRAMFileHandle.h"
 #include "Lucy/Test/Store/TestRAMFolder.h"
 #include "Lucy/Test/TestSchema.h"
+#include "Lucy/Test/TestSimple.h"
 #include "Lucy/Test/Util/TestFreezer.h"
 #include "Lucy/Test/Util/TestIndexFileNames.h"
 #include "Lucy/Test/Util/TestJson.h"
@@ -136,6 +137,7 @@ Test_create_test_suite() {
     TestSuite_Add_Batch(suite, (TestBatch*)TestFType_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestSeg_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestHighlighter_new());
+    TestSuite_Add_Batch(suite, (TestBatch*)TestSimple_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestSpan_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestHeatMap_new());
     TestSuite_Add_Batch(suite, (TestBatch*)TestTermQuery_new());

http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/core/Lucy/Test/TestSimple.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/TestSimple.c b/core/Lucy/Test/TestSimple.c
new file mode 100644
index 0000000..6e8adfd
--- /dev/null
+++ b/core/Lucy/Test/TestSimple.c
@@ -0,0 +1,112 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define C_TESTLUCY_TESTSIMPLE
+#define TESTLUCY_USE_SHORT_NAMES
+#include "Lucy/Util/ToolSet.h"
+
+#include "Lucy/Test/TestSimple.h"
+#include "Lucy/Simple.h"
+
+#include "Clownfish/TestHarness/TestBatchRunner.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Store/RAMFolder.h"
+
+TestSimple*
+TestSimple_new() {
+    return (TestSimple*)Class_Make_Obj(TESTSIMPLE);
+}
+
+static void
+test_simple(TestBatchRunner *runner) {
+    RAMFolder *folder   = RAMFolder_new(NULL);
+    String    *language = SSTR_WRAP_UTF8("en", 2);
+    Simple    *lucy     = Simple_new((Obj*)folder, language);
+
+    String *food_field = SSTR_WRAP_UTF8("food", 4);
+
+    {
+        Doc *doc = Doc_new(NULL, 0);
+        String *value = SSTR_WRAP_UTF8("creamed corn", 12);
+        Doc_Store(doc, food_field, (Obj*)value);
+        Simple_Add_Doc(lucy, doc);
+        DECREF(doc);
+
+        String *query = SSTR_WRAP_UTF8("creamed", 7);
+        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+        TEST_INT_EQ(runner, num_results, 1, "Search works right after add");
+    }
+
+    {
+        Doc *doc = Doc_new(NULL, 0);
+        String *value = SSTR_WRAP_UTF8("creamed spinach", 15);
+        Doc_Store(doc, food_field, (Obj*)value);
+        Simple_Add_Doc(lucy, doc);
+        DECREF(doc);
+
+        String *query = SSTR_WRAP_UTF8("creamed", 7);
+        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+        TEST_INT_EQ(runner, num_results, 2, "Search returns total hits");
+    }
+
+    {
+        Doc *doc = Doc_new(NULL, 0);
+        String *value = SSTR_WRAP_UTF8("creamed broccoli", 16);
+        Doc_Store(doc, food_field, (Obj*)value);
+        Simple_Add_Doc(lucy, doc);
+        DECREF(doc);
+
+        DECREF(lucy);
+        lucy = Simple_new((Obj*)folder, language);
+
+        String *query = SSTR_WRAP_UTF8("cream", 5);
+        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+        TEST_INT_EQ(runner, num_results, 3, "commit upon destroy");
+
+        HitDoc *hit;
+        while ((hit = Simple_Next(lucy)) != NULL) {
+            String *food = (String*)HitDoc_Extract(hit, food_field);
+            TEST_TRUE(runner, Str_Starts_With_Utf8(food, "cream", 5), "Next");
+            DECREF(food);
+            DECREF(hit);
+        }
+    }
+
+    {
+        Doc *doc = Doc_new(NULL, 0);
+        String *band_field = SSTR_WRAP_UTF8("band", 4);
+        String *value = SSTR_WRAP_UTF8("Cream", 5);
+        Doc_Store(doc, band_field, (Obj*)value);
+        Simple_Add_Doc(lucy, doc);
+        DECREF(doc);
+
+        String *query = SSTR_WRAP_UTF8("cream", 5);
+        uint32_t num_results = Simple_Search(lucy, query, 0, 10);
+        TEST_INT_EQ(runner, num_results, 4,
+                    "Search uses correct EasyAnalyzer");
+    }
+
+    DECREF(lucy);
+    DECREF(folder);
+}
+
+void
+TestSimple_Run_IMP(TestSimple *self, TestBatchRunner *runner) {
+    TestBatchRunner_Plan(runner, (TestBatch*)self, 7);
+    test_simple(runner);
+}
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/core/Lucy/Test/TestSimple.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Test/TestSimple.cfh b/core/Lucy/Test/TestSimple.cfh
new file mode 100644
index 0000000..045fe8c
--- /dev/null
+++ b/core/Lucy/Test/TestSimple.cfh
@@ -0,0 +1,29 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel TestLucy;
+
+class Lucy::Test::TestSimple
+    inherits Clownfish::TestHarness::TestBatch {
+
+    inert incremented TestSimple*
+    new();
+
+    void
+    Run(TestSimple *self, TestBatchRunner *runner);
+}
+
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/7c09f4df/perl/t/core/308-simple.t
----------------------------------------------------------------------
diff --git a/perl/t/core/308-simple.t b/perl/t/core/308-simple.t
new file mode 100644
index 0000000..5a558a1
--- /dev/null
+++ b/perl/t/core/308-simple.t
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+
+use Lucy::Test;
+my $success = Lucy::Test::run_tests("Lucy::Test::TestSimple");
+
+exit($success ? 0 : 1);
+


[4/5] lucy git commit: Port Lucy::Simple to C

Posted by nw...@apache.org.
Port Lucy::Simple to C


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/ebde55f3
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/ebde55f3
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/ebde55f3

Branch: refs/heads/master
Commit: ebde55f3716e3f746095ce957054b1eecf936a4d
Parents: f1c3021
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 03:43:00 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Aug 5 15:26:15 2015 +0200

----------------------------------------------------------------------
 core/Lucy/Simple.c                       | 190 +++++++++++++++++++
 core/Lucy/Simple.cfh                     |  99 ++++++++++
 perl/buildlib/Lucy/Build/Binding/Misc.pm | 107 +++++++++++
 perl/lib/Lucy/Simple.pm                  | 261 --------------------------
 4 files changed, 396 insertions(+), 261 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/core/Lucy/Simple.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Simple.c b/core/Lucy/Simple.c
new file mode 100644
index 0000000..2271984
--- /dev/null
+++ b/core/Lucy/Simple.c
@@ -0,0 +1,190 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define CFISH_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
+
+#define C_LUCY_SIMPLE
+#include "Lucy/Simple.h"
+
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/HashIterator.h"
+#include "Clownfish/String.h"
+#include "Clownfish/Vector.h"
+#include "Lucy/Analysis/EasyAnalyzer.h"
+#include "Lucy/Document/Doc.h"
+#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Index/Indexer.h"
+#include "Lucy/Index/PolyReader.h"
+#include "Lucy/Plan/FullTextType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Search/Hits.h"
+#include "Lucy/Search/IndexSearcher.h"
+
+Simple*
+Simple_new(Obj *index, String *language) {
+    Simple *self = (Simple*)Class_Make_Obj(SIMPLE);
+    return Simple_init(self, index, language);
+}
+
+Simple*
+Simple_init(Simple *self, Obj *index, String *language) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+    ivars->index    = INCREF(index);
+    ivars->language = Str_Clone(language);
+    return self;
+}
+
+void
+Simple_Destroy_IMP(Simple *self) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    Simple_Finish_Indexing(self);
+
+    DECREF(ivars->index);
+    DECREF(ivars->language);
+    DECREF(ivars->schema);
+    DECREF(ivars->type);
+    DECREF(ivars->indexer);
+    DECREF(ivars->searcher);
+    DECREF(ivars->hits);
+
+    SUPER_DESTROY(self, SIMPLE);
+}
+
+static void
+S_create_indexer(Simple *self) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    // Trigger searcher refresh.
+    DECREF(ivars->searcher);
+    DECREF(ivars->hits);
+    ivars->searcher = NULL;
+    ivars->hits     = NULL;
+
+    // Get type and schema
+    Schema     *schema      = NULL;
+    FieldType  *type        = NULL;
+    PolyReader *reader      = PolyReader_open(ivars->index, NULL, NULL);
+    Vector     *seg_readers = PolyReader_Get_Seg_Readers(reader);
+
+    if (Vec_Get_Size(seg_readers) == 0) {
+        // Index is empty, create new schema and type.
+        schema = Schema_new();
+        EasyAnalyzer *analyzer = EasyAnalyzer_new(ivars->language);
+        type = (FieldType*)FullTextType_new((Analyzer*)analyzer);
+        DECREF(analyzer);
+    }
+    else {
+        // Get schema from reader.
+        schema = (Schema*)INCREF(PolyReader_Get_Schema(reader));
+        Vector *fields = Schema_All_Fields(schema);
+        String *field  = (String*)CERTIFY(Vec_Fetch(fields, 0), STRING);
+        type = (FieldType*)INCREF(Schema_Fetch_Type(schema, field));
+        DECREF(fields);
+    }
+
+    ivars->indexer = Indexer_new(schema, ivars->index, NULL, 0);
+    ivars->schema  = schema;
+    ivars->type    = type;
+
+    DECREF(reader);
+}
+
+void
+Simple_Add_Doc_IMP(Simple *self, Doc *doc) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    if (!ivars->indexer) {
+        S_create_indexer(self);
+    }
+
+    Vector *field_names = Doc_Field_Names(doc);
+
+    for (size_t i = 0, max = Vec_Get_Size(field_names); i < max; i++) {
+        String *field = (String*)Vec_Fetch(field_names, i);
+        Schema_Spec_Field(ivars->schema, field, ivars->type);
+    }
+
+    Indexer_Add_Doc(ivars->indexer, doc, 1.0);
+
+    DECREF(field_names);
+}
+
+uint32_t
+Simple_Search_IMP(Simple *self, String *query, uint32_t offset,
+                  uint32_t num_wanted) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    // Flush recent adds; lazily create searcher.
+    Simple_Finish_Indexing(self);
+    if (!ivars->searcher) {
+        ivars->searcher = IxSearcher_new(ivars->index);
+    }
+
+    DECREF(ivars->hits);
+    ivars->hits = IxSearcher_Hits(ivars->searcher, (Obj*)query, offset,
+                                  num_wanted, NULL);
+
+    return Hits_Total_Hits(ivars->hits);
+}
+
+HitDoc*
+Simple_Next_IMP(Simple *self) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    if (!ivars->hits) { return NULL; }
+
+    // Get the hit, bail if hits are exhausted.
+    HitDoc *doc = Hits_Next(ivars->hits);
+    if (!doc) {
+        DECREF(ivars->hits);
+        ivars->hits = NULL;
+    }
+
+    return doc;
+}
+
+Indexer*
+Simple_Get_Indexer_IMP(Simple *self) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    if (!ivars->indexer) {
+        S_create_indexer(self);
+    }
+
+    return ivars->indexer;
+}
+
+void
+Simple_Finish_Indexing_IMP(Simple *self) {
+    SimpleIVARS *const ivars = Simple_IVARS(self);
+
+    // Don't bother to throw an error if index not modified.
+    if (ivars->indexer) {
+        Indexer_Commit(ivars->indexer);
+
+        // Trigger searcher and indexer refresh.
+        DECREF(ivars->schema);
+        DECREF(ivars->type);
+        DECREF(ivars->indexer);
+        ivars->schema   = NULL;
+        ivars->type     = NULL;
+        ivars->indexer  = NULL;
+    }
+}
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/core/Lucy/Simple.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Simple.cfh b/core/Lucy/Simple.cfh
new file mode 100644
index 0000000..3680ce5
--- /dev/null
+++ b/core/Lucy/Simple.cfh
@@ -0,0 +1,99 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+parcel Lucy;
+
+/** Basic search engine.
+ *
+ * Lucy::Simple is a stripped-down interface for the Apache Lucy search
+ * engine library.
+ */
+public class Lucy::Simple {
+
+    Obj           *index;
+    String        *language;
+    Schema        *schema;
+    FieldType     *type;
+    Indexer       *indexer;
+    IndexSearcher *searcher;
+    Hits          *hits;
+
+    /** Create a Lucy::Simple object, which can be used for both indexing and
+     * searching.  Both parameters `path` and `language` are required.
+     *
+     * @param path Where the index directory should be located.  If no index
+     * is found at the specified location, one will be created.
+     * @param language The language of the documents in your collection,
+     * indicated  by a two-letter ISO code.  12 languages are supported:
+     *
+     *     |-----------------------|
+     *     | Language   | ISO code |
+     *     |-----------------------|
+     *     | Danish     | da       |
+     *     | Dutch      | nl       |
+     *     | English    | en       |
+     *     | Finnish    | fi       |
+     *     | French     | fr       |
+     *     | German     | de       |
+     *     | Italian    | it       |
+     *     | Norwegian  | no       |
+     *     | Portuguese | pt       |
+     *     | Spanish    | es       |
+     *     | Swedish    | sv       |
+     *     | Russian    | ru       |
+     *     |-----------------------|
+     */
+    public inert Simple*
+    new(Obj *path, String *language);
+
+    public inert Simple*
+    init(Simple *self, Obj *path, String *language);
+
+    /** Add a document to the index.
+     */
+    public void
+    Add_Doc(Simple *self, Doc *doc);
+
+    /** Search the index.  Returns the total number of documents which match
+     * the query.  (This number is unlikely to match `num_wanted`.)
+     *
+     * @param query A search query string.
+     * @param offset The number of most-relevant hits to discard, typically
+     * used when "paging" through hits N at a time.  Setting offset to 20 and
+     * num_wanted to 10 retrieves hits 21-30, assuming that 30 hits can be
+     * found.
+     * @param num_wanted The number of hits you would like to see after
+     * `offset` is taken into account.
+     */
+    public uint32_t
+    Search(Simple *self, String *query, uint32_t offset = 0,
+           uint32_t num_wanted = 10);
+
+    /** Return the next hit, or [](cfish:@null) when the iterator is exhausted.
+     */
+    public incremented nullable HitDoc*
+    Next(Simple *self);
+
+    Indexer*
+    Get_Indexer(Simple *self);
+
+    void
+    Finish_Indexing(Simple *self);
+
+    public void
+    Destroy(Simple *self);
+}
+

http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/perl/buildlib/Lucy/Build/Binding/Misc.pm
----------------------------------------------------------------------
diff --git a/perl/buildlib/Lucy/Build/Binding/Misc.pm b/perl/buildlib/Lucy/Build/Binding/Misc.pm
index ebd4fea..92c8b95 100644
--- a/perl/buildlib/Lucy/Build/Binding/Misc.pm
+++ b/perl/buildlib/Lucy/Build/Binding/Misc.pm
@@ -26,6 +26,7 @@ sub bind_all {
     $hierarchy->inherit_metadata;
 
     $class->bind_lucy;
+    $class->bind_simple;
     $class->bind_test;
 }
 
@@ -148,6 +149,112 @@ END_XS_CODE
     Clownfish::CFC::Binding::Perl::Class->register($binding);
 }
 
+sub bind_simple {
+    my @exposed = qw(
+        Search
+        Next
+    );
+    my @hand_rolled = qw( Add_Doc );
+
+    my $pod_spec = Clownfish::CFC::Binding::Perl::Pod->new;
+    my $synopsis = <<'END_SYNOPSIS';
+First, build an index of your documents.
+
+    my $index = Lucy::Simple->new(
+        path     => '/path/to/index/'
+        language => 'en',
+    );
+
+    while ( my ( $title, $content ) = each %source_docs ) {
+        $index->add_doc({
+            title    => $title,
+            content  => $content,
+        });
+    }
+
+Later, search the index.
+
+    my $total_hits = $index->search(
+        query      => $query_string,
+        offset     => 0,
+        num_wanted => 10,
+    );
+
+    print "Total hits: $total_hits\n";
+    while ( my $hit = $index->next ) {
+        print "$hit->{title}\n",
+    }
+END_SYNOPSIS
+    my $add_doc_pod = <<'END_ADD_DOC_POD';
+=head2 add_doc
+
+    $lucy->add_doc({
+        location => $url,
+        title    => $title,
+        content  => $content,
+    });
+
+Add a document to the index. The document must be supplied as a hashref,
+with field names as keys and content as values.
+
+END_ADD_DOC_POD
+    $pod_spec->set_synopsis($synopsis);
+
+    # Override is necessary because there's no standard way to explain
+    # hash/hashref across multiple host languages.
+    $pod_spec->add_method(
+        method => 'Add_Doc',
+        alias  => 'add_doc',
+        pod    => $add_doc_pod,
+    );
+    $pod_spec->add_method( method => $_, alias => lc($_) ) for @exposed;
+
+    my $xs_code = <<'END_XS_CODE';
+MODULE = Lucy  PACKAGE = Lucy::Simple
+
+void
+add_doc(self, doc_sv)
+    lucy_Simple *self;
+    SV *doc_sv;
+PPCODE:
+{
+    lucy_Doc *doc = NULL;
+
+    // Either get a Doc or use the stock doc.
+    if (sv_isobject(doc_sv)
+        && sv_derived_from(doc_sv, "Lucy::Document::Doc")
+       ) {
+        IV tmp = SvIV(SvRV(doc_sv));
+        doc = INT2PTR(lucy_Doc*, tmp);
+    }
+    else if (XSBind_sv_defined(aTHX_ doc_sv) && SvROK(doc_sv)) {
+        HV *maybe_fields = (HV*)SvRV(doc_sv);
+        if (SvTYPE((SV*)maybe_fields) == SVt_PVHV) {
+            lucy_Indexer *indexer = LUCY_Simple_Get_Indexer(self);
+            doc = LUCY_Indexer_Get_Stock_Doc(indexer);
+            LUCY_Doc_Set_Fields(doc, maybe_fields);
+        }
+    }
+    if (!doc) {
+        THROW(CFISH_ERR, "Need either a hashref or a %o",
+              CFISH_Class_Get_Name(LUCY_DOC));
+    }
+
+    LUCY_Simple_Add_Doc(self, doc);
+}
+END_XS_CODE
+
+    my $binding = Clownfish::CFC::Binding::Perl::Class->new(
+        parcel     => "Lucy",
+        class_name => "Lucy::Simple",
+    );
+    $binding->exclude_method($_) for @hand_rolled;
+    $binding->append_xs($xs_code);
+    $binding->set_pod_spec($pod_spec);
+
+    Clownfish::CFC::Binding::Perl::Class->register($binding);
+}
+
 sub bind_test {
     my $xs_code = <<'END_XS_CODE';
 MODULE = Lucy   PACKAGE = Lucy::Test

http://git-wip-us.apache.org/repos/asf/lucy/blob/ebde55f3/perl/lib/Lucy/Simple.pm
----------------------------------------------------------------------
diff --git a/perl/lib/Lucy/Simple.pm b/perl/lib/Lucy/Simple.pm
index e409615..e56a6ba 100644
--- a/perl/lib/Lucy/Simple.pm
+++ b/perl/lib/Lucy/Simple.pm
@@ -13,274 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-use strict;
-use warnings;
-
 package Lucy::Simple;
 use Lucy;
 our $VERSION = '0.004000';
 $VERSION = eval $VERSION;
-use Carp;
-use Scalar::Util qw( weaken reftype refaddr );
-
-use Lucy::Plan::Schema;
-use Lucy::Analysis::EasyAnalyzer;
-use Lucy::Index::Indexer;
-use Lucy::Search::IndexSearcher;
-
-my %obj_cache;
-
-sub new {
-    my ( $either, %args ) = @_;
-    my $path     = delete $args{path};
-    my $language = lc( delete $args{language} );
-    confess("Missing required parameter 'path'") unless defined $path;
-    confess("Invalid language: '$language'")
-        unless $language =~ /^(?:da|de|en|es|fi|fr|it|nl|no|pt|ru|sv)$/;
-    my @remaining = keys %args;
-    confess("Invalid params: @remaining") if @remaining;
-    my $self = bless {
-        type     => undef,
-        schema   => undef,
-        indexer  => undef,
-        searcher => undef,
-        hits     => undef,
-        language => $language,
-        path     => $path,
-        },
-        ref($either) || $either;
-
-    # Cache the object for later clean-up.
-    weaken( $obj_cache{ refaddr $self } = $self );
-
-    return $self;
-}
-
-sub _lazily_create_indexer {
-    my $self = shift;
-    if ( !defined $self->{indexer} ) {
-        # Get type and schema
-        my $schema;
-        my $reader = Lucy::Index::PolyReader->open( index => $self->{path} );
-        if ( !@{ $reader->seg_readers } ) {
-            # index is empty, create new schema and type
-            $schema = Lucy::Plan::Schema->new;
-            my $analyzer = Lucy::Analysis::EasyAnalyzer->new(
-                language => $self->{language}, );
-            $self->{type}
-                = Lucy::Plan::FullTextType->new( analyzer => $analyzer, );
-        }
-        else {
-            # get schema from reader
-            $schema = $reader->get_schema;
-            my $field = $schema->all_fields->[0];
-            $self->{type} = $schema->fetch_type($field);
-        }
-        $self->{schema}  = $schema;
-        $self->{indexer} = Lucy::Index::Indexer->new(
-            schema => $schema,
-            index  => $self->{path},
-        );
-    }
-}
-
-sub add_doc {
-    my ( $self, $hashref ) = @_;
-    croak("add_doc requires exactly one argument: a hashref")
-        unless ( @_ == 2 and reftype($hashref) eq 'HASH' );
-    $self->_lazily_create_indexer;
-    my $schema = $self->{schema};
-    my $type   = $self->{type};
-    $schema->spec_field( name => $_, type => $type ) for keys %$hashref;
-    $self->{indexer}->add_doc($hashref);
-}
-
-sub _finish_indexing {
-    my $self = shift;
-
-    # Don't bother to throw an error if index not modified.
-    if ( defined $self->{indexer} ) {
-        $self->{indexer}->commit;
-
-        # Trigger searcher and indexer refresh.
-        undef $self->{indexer};
-        undef $self->{searcher};
-    }
-}
-
-sub search {
-    my ( $self, %args ) = @_;
-
-    # Flush recent adds; lazily create searcher.
-    $self->_finish_indexing;
-    if ( !defined $self->{searcher} ) {
-        $self->{searcher}
-            = Lucy::Search::IndexSearcher->new( index => $self->{path} );
-    }
-
-    $self->{hits} = $self->{searcher}->hits(%args);
-
-    return $self->{hits}->total_hits;
-}
-
-sub next {
-    my $self = shift;
-    return unless defined $self->{hits};
-
-    # Get the hit, bail if hits are exhausted.
-    my $hit = $self->{hits}->next;
-    if ( !defined $hit ) {
-        undef $self->{hits};
-        return;
-    }
-
-    return $hit;
-}
-
-sub DESTROY {
-    for (shift) {
-        $_->_finish_indexing;
-        delete $obj_cache{ refaddr $_ };
-    }
-}
-
-END {
-    # Finish indexing for any objects that still exist, since, if we wait
-    # until global destruction, our Indexer might no longer exist,
-    # (see bug #32689)
-    $_->_finish_indexing for values %obj_cache;
-}
 
 1;
 
 __END__
 
-__POD__
-
-=head1 NAME
-
-Lucy::Simple - Basic search engine.
-
-=head1 SYNOPSIS
-
-First, build an index of your documents.
-
-    my $index = Lucy::Simple->new(
-        path     => '/path/to/index/'
-        language => 'en',
-    );
-
-    while ( my ( $title, $content ) = each %source_docs ) {
-        $index->add_doc({
-            title    => $title,
-            content  => $content,
-        });
-    }
-
-Later, search the index.
-
-    my $total_hits = $index->search( 
-        query      => $query_string,
-        offset     => 0,
-        num_wanted => 10,
-    );
-
-    print "Total hits: $total_hits\n";
-    while ( my $hit = $index->next ) {
-        print "$hit->{title}\n",
-    }
-
-=head1 DESCRIPTION
-
-Lucy::Simple is a stripped-down interface for the L<Apache Lucy|Lucy> search
-engine library.  
-
-=head1 METHODS 
-
-=head2 new
-
-    my $lucy = Lucy::Simple->new(
-        path     => '/path/to/index/',
-        language => 'en',
-    );
-
-Create a Lucy::Simple object, which can be used for both indexing and
-searching.  Two hash-style parameters are required.
-
-=over 
-
-=item *
-
-B<path> - Where the index directory should be located.  If no index is found
-at the specified location, one will be created.
-
-=item *
-
-B<language> - The language of the documents in your collection, indicated 
-by a two-letter ISO code.  12 languages are supported:
-
-    |-----------------------|
-    | Language   | ISO code |
-    |-----------------------|
-    | Danish     | da       |
-    | Dutch      | nl       |
-    | English    | en       |
-    | Finnish    | fi       |
-    | French     | fr       |
-    | German     | de       |
-    | Italian    | it       |
-    | Norwegian  | no       |
-    | Portuguese | pt       |
-    | Spanish    | es       |
-    | Swedish    | sv       |
-    | Russian    | ru       |
-    |-----------------------|
-
-=back
-
-=head2 add_doc 
-
-    $lucy->add_doc({
-        location => $url,
-        title    => $title,
-        content  => $content,
-    });
-
-Add a document to the index.  The document must be supplied as a hashref, with
-field names as keys and content as values.
-
-=head2 search
-
-    my $total_hits = $lucy->search( 
-        query      => $query_string,    # required
-        offset     => 40,               # default 0
-        num_wanted => 20,               # default 10
-    );
-
-Search the index.  Returns the total number of documents which match the
-query.  (This number is unlikely to match C<num_wanted>.)
-
-=over
-
-=item *
-
-B<query> - A search query string.
-
-=item *
-
-B<offset> - The number of most-relevant hits to discard, typically used when
-"paging" through hits N at a time.  Setting offset to 20 and num_wanted to 10
-retrieves hits 21-30, assuming that 30 hits can be found.
-
-=item *
-
-B<num_wanted> - The number of hits you would like to see after C<offset> is
-taken into account.  
-
-=back
-
-=head1 BUGS
-
-Not thread-safe.
 
-=cut


[3/5] lucy git commit: Implement Doc_Field_Names

Posted by nw...@apache.org.
Implement Doc_Field_Names


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/f1c30213
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/f1c30213
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/f1c30213

Branch: refs/heads/master
Commit: f1c30213afc3b2d9b1838ce662a127f0004ed5d8
Parents: 78a8299
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Aug 1 17:22:18 2015 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Aug 5 15:26:15 2015 +0200

----------------------------------------------------------------------
 c/src/Lucy/Document/Doc.c   |  7 +++++++
 core/Lucy/Document/Doc.cfh  |  5 +++++
 go/cfext/lucy.c             |  7 +++++++
 go/lucy/lucy.go             | 12 ++++++++++++
 perl/xs/Lucy/Document/Doc.c | 20 ++++++++++++++++++++
 5 files changed, 51 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/c/src/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Document/Doc.c b/c/src/Lucy/Document/Doc.c
index 717059f..d0fb065 100644
--- a/c/src/Lucy/Document/Doc.c
+++ b/c/src/Lucy/Document/Doc.c
@@ -23,6 +23,7 @@
 #include "Clownfish/Err.h"
 #include "Clownfish/Hash.h"
 #include "Clownfish/Class.h"
+#include "Clownfish/Vector.h"
 #include "Lucy/Store/InStream.h"
 #include "Lucy/Store/OutStream.h"
 #include "Lucy/Util/Freezer.h"
@@ -85,6 +86,12 @@ Doc_Extract_IMP(Doc *self, String *field) {
     return INCREF(Hash_Fetch(hash, field));
 }
 
+Vector*
+Doc_Field_Names_IMP(Doc *self) {
+    Hash *hash = (Hash*)Doc_IVARS(self)->fields;
+    return Hash_Keys(hash);
+}
+
 Hash*
 Doc_Dump_IMP(Doc *self) {
     UNUSED_VAR(self);

http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/core/Lucy/Document/Doc.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Document/Doc.cfh b/core/Lucy/Document/Doc.cfh
index d30a6b3..c1da681 100644
--- a/core/Lucy/Document/Doc.cfh
+++ b/core/Lucy/Document/Doc.cfh
@@ -76,6 +76,11 @@ public class Lucy::Document::Doc inherits Clownfish::Obj {
     nullable incremented Obj*
     Extract(Doc *self, String *field);
 
+    /** Return a list of names of all fields present.
+     */
+    incremented Vector*
+    Field_Names(Doc *self);
+
     /* Unimplemented methods.
      */
     public bool

http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/go/cfext/lucy.c
----------------------------------------------------------------------
diff --git a/go/cfext/lucy.c b/go/cfext/lucy.c
index 9e9b840..fc2df3b 100644
--- a/go/cfext/lucy.c
+++ b/go/cfext/lucy.c
@@ -135,6 +135,13 @@ Doc_Extract_IMP(Doc *self, String *field) {
     return GOLUCY_Doc_Extract_BRIDGE(self, field);
 }
 
+Doc_Field_Names_t GOLUCY_Doc_Field_Names_BRIDGE;
+
+Vector*
+Doc_Field_Names_IMP(Doc *self) {
+    return GOLUCY_Doc_Field_Names_BRIDGE(self);
+}
+
 Hash*
 Doc_Dump_IMP(Doc *self) {
     UNUSED_VAR(self);

http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/go/lucy/lucy.go
----------------------------------------------------------------------
diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go
index bc2e9f8..fcddfbd 100644
--- a/go/lucy/lucy.go
+++ b/go/lucy/lucy.go
@@ -95,6 +95,10 @@ extern cfish_Obj*
 GOLUCY_Doc_Extract(lucy_Doc *self, cfish_String *field);
 extern cfish_Obj*
 (*GOLUCY_Doc_Extract_BRIDGE)(lucy_Doc *self, cfish_String *field);
+extern cfish_Vector*
+GOLUCY_Doc_Field_Names(lucy_Doc *self);
+extern cfish_Vector*
+(*GOLUCY_Doc_Field_Names_BRIDGE)(lucy_Doc *self);
 extern bool
 GOLUCY_Doc_Equals(lucy_Doc *self, cfish_Obj *other);
 extern bool
@@ -132,6 +136,7 @@ GOLUCY_glue_exported_symbols() {
 	GOLUCY_Doc_Serialize_BRIDGE = GOLUCY_Doc_Serialize;
 	GOLUCY_Doc_Deserialize_BRIDGE = GOLUCY_Doc_Deserialize;
 	GOLUCY_Doc_Extract_BRIDGE = GOLUCY_Doc_Extract;
+	GOLUCY_Doc_Field_Names_BRIDGE = GOLUCY_Doc_Field_Names;
 	GOLUCY_Doc_Equals_BRIDGE = GOLUCY_Doc_Equals;
 	GOLUCY_Doc_Destroy_BRIDGE = GOLUCY_Doc_Destroy;
 	GOLUCY_DefDocReader_Fetch_Doc_BRIDGE = GOLUCY_DefDocReader_Fetch_Doc;
@@ -306,6 +311,13 @@ func GOLUCY_Doc_Extract(d *C.lucy_Doc, field *C.cfish_String) *C.cfish_Obj {
 	return C.cfish_inc_refcount(unsafe.Pointer(val))
 }
 
+//export GOLUCY_Doc_Field_Names
+func GOLUCY_Doc_Field_Names(d *C.lucy_Doc) *C.cfish_Vector {
+	ivars := C.lucy_Doc_IVARS(d)
+	hash := (*C.cfish_Hash)(ivars.fields)
+	return C.CFISH_Hash_Keys(hash)
+}
+
 //export GOLUCY_Doc_Equals
 func GOLUCY_Doc_Equals(d *C.lucy_Doc, other *C.cfish_Obj) C.bool {
 	twin := (*C.lucy_Doc)(unsafe.Pointer(other))

http://git-wip-us.apache.org/repos/asf/lucy/blob/f1c30213/perl/xs/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/perl/xs/Lucy/Document/Doc.c b/perl/xs/Lucy/Document/Doc.c
index fff4e62..c352f4e 100644
--- a/perl/xs/Lucy/Document/Doc.c
+++ b/perl/xs/Lucy/Document/Doc.c
@@ -159,6 +159,26 @@ LUCY_Doc_Extract_IMP(lucy_Doc *self, cfish_String *field) {
     return retval;
 }
 
+cfish_Vector*
+LUCY_Doc_Field_Names_IMP(lucy_Doc *self) {
+    dTHX;
+    lucy_DocIVARS *const ivars = lucy_Doc_IVARS(self);
+
+    HV           *fields     = (HV*)ivars->fields;
+    I32           num_fields = hv_iterinit(fields);
+    cfish_Vector *retval     = cfish_Vec_new(num_fields);
+
+    while (num_fields--) {
+        HE *entry = hv_iternext(fields);
+        STRLEN key_size;
+        const char *key = XSBind_hash_key_to_utf8(aTHX_ entry, &key_size);
+        cfish_String *key_str = cfish_Str_new_from_trusted_utf8(key, key_size);
+        CFISH_Vec_Push(retval, (cfish_Obj*)key_str);
+    }
+
+    return retval;
+}
+
 cfish_Hash*
 LUCY_Doc_Dump_IMP(lucy_Doc *self) {
     dTHX;