You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/03/11 22:52:48 UTC

[lucy-commits] [7/18] git commit: refs/heads/master - Start to implement Lucy methods for C bindings

Start to implement Lucy methods for C bindings


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/e737b717
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/e737b717
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/e737b717

Branch: refs/heads/master
Commit: e737b7170f85c44081715be26869d50792299648
Parents: 4025736
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sun Nov 25 20:39:34 2012 +0100
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sat Mar 9 17:51:54 2013 +0100

----------------------------------------------------------------------
 c/src/Lucy/Document/Doc.c    |  110 ++++++++++++++++++++++++-------------
 c/src/Lucy/Index/DocReader.c |  101 ++++++++++++++++++++++++++++++++--
 c/src/Lucy/Index/Inverter.c  |  104 ++++++++++++++++++++++++++++++++++-
 3 files changed, 269 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Document/Doc.c b/c/src/Lucy/Document/Doc.c
index db1d8f7..2cf88ec 100644
--- a/c/src/Lucy/Document/Doc.c
+++ b/c/src/Lucy/Document/Doc.c
@@ -15,79 +15,113 @@
  */
 
 #define C_LUCY_DOC
+#define CHY_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
 
-#include "CFBind.h"
 #include "Lucy/Document/Doc.h"
+#include "Clownfish/CharBuf.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/VTable.h"
 #include "Lucy/Store/InStream.h"
 #include "Lucy/Store/OutStream.h"
 
-lucy_Doc*
-lucy_Doc_init(lucy_Doc *self, void *fields, int32_t doc_id) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(lucy_Doc*);
+Doc*
+Doc_init(Doc *self, void *fields, int32_t doc_id) {
+    Hash *hash;
+
+    if (fields) {
+        hash = (Hash *)CERTIFY(fields, HASH);
+        INCREF(hash);
+    }
+    else {
+        hash = Hash_new(0);
+    }
+    self->fields = hash;
+    self->doc_id = doc_id;
+
+    return self;
 }
 
 void
-lucy_Doc_set_fields(lucy_Doc *self, void *fields) {
-    THROW(LUCY_ERR, "TODO");
+Doc_set_fields(Doc *self, void *fields) {
+    DECREF(self->fields);
+    self->fields = CERTIFY(fields, HASH);
 }
 
 uint32_t
-lucy_Doc_get_size(lucy_Doc *self) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(uint32_t);
+Doc_get_size(Doc *self) {
+    Hash *hash = (Hash *)self->fields;
+    return Hash_Get_Size(hash);
 }
 
 void
-lucy_Doc_store(lucy_Doc *self, const lucy_CharBuf *field, lucy_Obj *value) {
-    THROW(LUCY_ERR, "TODO");
+Doc_store(Doc *self, const CharBuf *field, Obj *value) {
+    Hash *hash = (Hash *)self->fields;
+    Hash_Store(hash, (Obj *)field, value);
+    INCREF(value);
 }
 
 void
-lucy_Doc_serialize(lucy_Doc *self, lucy_OutStream *outstream) {
-    THROW(LUCY_ERR, "TODO");
+Doc_serialize(Doc *self, OutStream *outstream) {
+    Hash *hash = (Hash *)self->fields;
+    Hash_Serialize(hash, outstream);
+    OutStream_Write_C32(outstream, self->doc_id);
 }
 
-lucy_Doc*
-lucy_Doc_deserialize(lucy_Doc *self, lucy_InStream *instream) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(lucy_Doc*);
+Doc*
+Doc_deserialize(Doc *self, InStream *instream) {
+     Hash *hash = (Hash*)VTable_Make_Obj(HASH);
+     self->fields = Hash_Deserialize(hash, instream);
+     self->doc_id = InStream_Read_C32(instream);
+     return self;
 }
 
-lucy_Obj*
-lucy_Doc_extract(lucy_Doc *self, lucy_CharBuf *field,
-                 lucy_ViewCharBuf *target) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(lucy_Obj*);
+Obj*
+Doc_extract(Doc *self, CharBuf *field,
+                 ViewCharBuf *target) {
+    Hash *hash = (Hash *)self->fields;
+    Obj  *obj  = Hash_Fetch(hash, (Obj *)field);
+
+    if (obj && Obj_Is_A(obj, CHARBUF)) {
+        ViewCB_Assign(target, (CharBuf *)obj);
+    }
+
+    return obj;
 }
 
 void*
-lucy_Doc_to_host(lucy_Doc *self) {
-    THROW(LUCY_ERR, "TODO");
+Doc_to_host(Doc *self) {
+    THROW(ERR, "TODO");
     UNREACHABLE_RETURN(void*);
 }
 
-lucy_Hash*
-lucy_Doc_dump(lucy_Doc *self) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(lucy_Hash*);
+Hash*
+Doc_dump(Doc *self) {
+    THROW(ERR, "TODO");
+    UNREACHABLE_RETURN(Hash*);
 }
 
-lucy_Doc*
-lucy_Doc_load(lucy_Doc *self, lucy_Obj *dump) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(lucy_Doc*);
+Doc*
+Doc_load(Doc *self, Obj *dump) {
+    THROW(ERR, "TODO");
+    UNREACHABLE_RETURN(Doc*);
 }
 
 bool
-lucy_Doc_equals(lucy_Doc *self, lucy_Obj *other) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(bool);
+Doc_equals(Doc *self, Obj *other) {
+    Doc *twin = (Doc*)other;
+
+    if (twin == self)                    { return true;  }
+    if (!Obj_Is_A(other, DOC)) { return false; }
+
+    return Hash_Equals(self->fields, twin->fields);
 }
 
 void
-lucy_Doc_destroy(lucy_Doc *self) {
-    THROW(LUCY_ERR, "TODO");
+Doc_destroy(Doc *self) {
+    DECREF(self->fields);
+    SUPER_DESTROY(self, DOC);
 }
 
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Index/DocReader.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Index/DocReader.c b/c/src/Lucy/Index/DocReader.c
index 430ccbe..e06e7ee 100644
--- a/c/src/Lucy/Index/DocReader.c
+++ b/c/src/Lucy/Index/DocReader.c
@@ -16,14 +16,105 @@
 
 #define C_LUCY_DOCREADER
 #define C_LUCY_DEFAULTDOCREADER
+#define CHY_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
 
-#include "CFBind.h"
 #include "Lucy/Index/DocReader.h"
+#include "Clownfish/ByteBuf.h"
+#include "Clownfish/CharBuf.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/Num.h"
+#include "Clownfish/Util/Memory.h"
 #include "Lucy/Document/HitDoc.h"
+#include "Lucy/Plan/FieldType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Store/InStream.h"
 
-lucy_HitDoc*
-lucy_DefDocReader_fetch_doc(lucy_DefaultDocReader *self, int32_t doc_id) {
-    THROW(LUCY_ERR, "TODO");
-    UNREACHABLE_RETURN(lucy_HitDoc*);
+HitDoc*
+DefDocReader_fetch_doc(DefaultDocReader *self, int32_t doc_id) {
+    Schema   *const schema = self->schema;
+    InStream *const dat_in = self->dat_in;
+    InStream *const ix_in  = self->ix_in;
+    Hash     *const fields = Hash_new(1);
+    int64_t   start;
+    uint32_t  num_fields;
+    uint32_t  field_name_cap = 31;
+    char     *field_name = (char*)MALLOCATE(field_name_cap + 1);
+
+    // Get data file pointer from index, read number of fields.
+    InStream_Seek(ix_in, (int64_t)doc_id * 8);
+    start = InStream_Read_U64(ix_in);
+    InStream_Seek(dat_in, start);
+    num_fields = InStream_Read_C32(dat_in);
+
+    // Decode stored data and build up the doc field by field.
+    while (num_fields--) {
+        uint32_t        field_name_len;
+        Obj       *value;
+        FieldType *type;
+
+        // Read field name.
+        field_name_len = InStream_Read_C32(dat_in);
+        if (field_name_len > field_name_cap) {
+            field_name_cap = field_name_len;
+            field_name     = (char*)REALLOCATE(field_name,
+                                                    field_name_cap + 1);
+        }
+        InStream_Read_Bytes(dat_in, field_name, field_name_len);
+
+        // Find the Field's FieldType.
+        ZombieCharBuf *field_name_zcb
+            = ZCB_WRAP_STR(field_name, field_name_len);
+        type = Schema_Fetch_Type(schema, (CharBuf*)field_name_zcb);
+
+        // Read the field value.
+        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
+            case FType_TEXT: {
+                    uint32_t value_len = InStream_Read_C32(dat_in);
+                    char *buf = (char*)MALLOCATE(value_len + 1);
+                    InStream_Read_Bytes(dat_in, buf, value_len);
+                    buf[value_len] = '\0'; 
+                    value = (Obj*)CB_new_steal_from_trusted_str(
+                                buf, value_len, value_len + 1);
+                    break;
+                }
+            case FType_BLOB: {
+                    uint32_t value_len = InStream_Read_C32(dat_in);
+                    char *buf = (char*)MALLOCATE(value_len);
+                    InStream_Read_Bytes(dat_in, buf, value_len);
+                    value = (Obj*)BB_new_steal_bytes(
+                                buf, value_len, value_len);
+                    break;
+                }
+            case FType_FLOAT32:
+                value = (Obj*)Float32_new(
+                                InStream_Read_F32(dat_in));
+                break;
+            case FType_FLOAT64:
+                value = (Obj*)Float64_new(
+                                InStream_Read_F64(dat_in));
+                break;
+            case FType_INT32:
+                value = (Obj*)Int32_new(
+                                (int32_t)InStream_Read_C32(dat_in));
+                break;
+            case FType_INT64:
+                value = (Obj*)Int64_new(
+                                (int64_t)InStream_Read_C64(dat_in));
+                break;
+            default:
+                value = NULL;
+                THROW(ERR, "Unrecognized type: %o", type);
+        }
+
+        // Store the value.
+        Hash_Store_Str(fields, field_name, field_name_len, value);
+    }
+    FREEMEM(field_name);
+
+    HitDoc *retval = HitDoc_new(fields, doc_id, 0.0);
+    DECREF(fields);
+    return retval;
 }
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Index/Inverter.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Index/Inverter.c b/c/src/Lucy/Index/Inverter.c
index e77c067..cfaafcd 100644
--- a/c/src/Lucy/Index/Inverter.c
+++ b/c/src/Lucy/Index/Inverter.c
@@ -16,14 +16,112 @@
 
 #define C_LUCY_INVERTER
 #define C_LUCY_INVERTERENTRY
+#define CHY_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
 
-#include "CFBind.h"
 #include "Lucy/Index/Inverter.h"
+#include "Clownfish/ByteBuf.h"
+#include "Clownfish/CharBuf.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/Num.h"
+#include "Clownfish/VArray.h"
 #include "Lucy/Document/Doc.h"
+#include "Lucy/Index/Segment.h"
+#include "Lucy/Plan/FieldType.h"
+#include "Lucy/Plan/Schema.h"
+
+static InverterEntry*
+S_fetch_entry(Inverter *self, CharBuf *field) {
+    Schema *const schema = self->schema;
+    int32_t field_num = Seg_Field_Num(self->segment, field);
+    if (!field_num) {
+        // This field seems not to be in the segment yet.  Try to find it in
+        // the Schema.
+        if (Schema_Fetch_Type(schema, field)) {
+            // The field is in the Schema.  Get a field num from the Segment.
+            field_num = Seg_Add_Field(self->segment, field);
+        }
+        else {
+            // We've truly failed to find the field.  The user must
+            // not have spec'd it.
+            THROW(ERR, "Unknown field name: '%o'", field);
+        }
+    }
+
+    InverterEntry *entry
+        = (InverterEntry*)VA_Fetch(self->entry_pool, field_num);
+    if (!entry) {
+        entry = InvEntry_new(schema, (CharBuf*)field, field_num);
+        VA_Store(self->entry_pool, field_num, (Obj*)entry);
+    }
+    return entry;
+}
 
 void
-lucy_Inverter_invert_doc(lucy_Inverter *self, lucy_Doc *doc) {
-    THROW(LUCY_ERR, "TODO");
+Inverter_invert_doc(Inverter *self, Doc *doc) {
+    Hash *const fields = (Hash*)Doc_Get_Fields(doc);
+    uint32_t   num_keys     = Hash_Iterate(fields);
+
+    // Prepare for the new doc.
+    Inverter_Set_Doc(self, doc);
+
+    // Extract and invert the doc's fields.
+    while (num_keys--) {
+        Obj *key, *obj;
+        Hash_Next(fields, &key, &obj);
+        CharBuf *field = (CharBuf*)CERTIFY(key, CHARBUF);
+        InverterEntry *inv_entry = S_fetch_entry(self, field);
+        FieldType *type = inv_entry->type;
+
+        // Get the field value.
+        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
+            case FType_TEXT: {
+                    CharBuf *char_buf
+                        = (CharBuf*)CERTIFY(obj, CHARBUF);
+                    ViewCharBuf *value
+                        = (ViewCharBuf*)inv_entry->value;
+                    ViewCB_Assign(value, char_buf);
+                    break;
+                }
+            case FType_BLOB: {
+                    ByteBuf *byte_buf
+                        = (ByteBuf*)CERTIFY(obj, BYTEBUF);
+                    ViewByteBuf *value
+                        = (ViewByteBuf*)inv_entry->value;
+                    ViewBB_Assign(value, byte_buf);
+                    break;
+                }
+            case FType_INT32: {
+                    int32_t int_val = (int32_t)Obj_To_I64(obj);
+                    Integer32* value = (Integer32*)inv_entry->value;
+                    Int32_Set_Value(value, int_val);
+                    break;
+                }
+            case FType_INT64: {
+                    int64_t int_val = Obj_To_I64(obj);
+                    Integer64* value = (Integer64*)inv_entry->value;
+                    Int64_Set_Value(value, int_val);
+                    break;
+                }
+            case FType_FLOAT32: {
+                    float float_val = (float)Obj_To_F64(obj);
+                    Float32* value = (Float32*)inv_entry->value;
+                    Float32_Set_Value(value, float_val);
+                    break;
+                }
+            case FType_FLOAT64: {
+                    double float_val = Obj_To_F64(obj);
+                    Float64* value = (Float64*)inv_entry->value;
+                    Float64_Set_Value(value, float_val);
+                    break;
+                }
+            default:
+                THROW(ERR, "Unrecognized type: %o", type);
+        }
+
+        Inverter_Add_Field(self, inv_entry);
+    }
 }