You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/03/11 22:52:48 UTC
[lucy-commits] [7/18] git commit: refs/heads/master - Start to implement Lucy
methods for C bindings
Start to implement Lucy methods for C bindings
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/e737b717
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/e737b717
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/e737b717
Branch: refs/heads/master
Commit: e737b7170f85c44081715be26869d50792299648
Parents: 4025736
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sun Nov 25 20:39:34 2012 +0100
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sat Mar 9 17:51:54 2013 +0100
----------------------------------------------------------------------
c/src/Lucy/Document/Doc.c | 110 ++++++++++++++++++++++++-------------
c/src/Lucy/Index/DocReader.c | 101 ++++++++++++++++++++++++++++++++--
c/src/Lucy/Index/Inverter.c | 104 ++++++++++++++++++++++++++++++++++-
3 files changed, 269 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Document/Doc.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Document/Doc.c b/c/src/Lucy/Document/Doc.c
index db1d8f7..2cf88ec 100644
--- a/c/src/Lucy/Document/Doc.c
+++ b/c/src/Lucy/Document/Doc.c
@@ -15,79 +15,113 @@
*/
#define C_LUCY_DOC
+#define CHY_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
-#include "CFBind.h"
#include "Lucy/Document/Doc.h"
+#include "Clownfish/CharBuf.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/VTable.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/OutStream.h"
-lucy_Doc*
-lucy_Doc_init(lucy_Doc *self, void *fields, int32_t doc_id) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(lucy_Doc*);
+Doc*
+Doc_init(Doc *self, void *fields, int32_t doc_id) {
+ Hash *hash;
+
+ if (fields) {
+ hash = (Hash *)CERTIFY(fields, HASH);
+ INCREF(hash);
+ }
+ else {
+ hash = Hash_new(0);
+ }
+ self->fields = hash;
+ self->doc_id = doc_id;
+
+ return self;
}
void
-lucy_Doc_set_fields(lucy_Doc *self, void *fields) {
- THROW(LUCY_ERR, "TODO");
+Doc_set_fields(Doc *self, void *fields) {
+ DECREF(self->fields);
+ self->fields = CERTIFY(fields, HASH);
}
uint32_t
-lucy_Doc_get_size(lucy_Doc *self) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(uint32_t);
+Doc_get_size(Doc *self) {
+ Hash *hash = (Hash *)self->fields;
+ return Hash_Get_Size(hash);
}
void
-lucy_Doc_store(lucy_Doc *self, const lucy_CharBuf *field, lucy_Obj *value) {
- THROW(LUCY_ERR, "TODO");
+Doc_store(Doc *self, const CharBuf *field, Obj *value) {
+ Hash *hash = (Hash *)self->fields;
+ Hash_Store(hash, (Obj *)field, value);
+ INCREF(value);
}
void
-lucy_Doc_serialize(lucy_Doc *self, lucy_OutStream *outstream) {
- THROW(LUCY_ERR, "TODO");
+Doc_serialize(Doc *self, OutStream *outstream) {
+ Hash *hash = (Hash *)self->fields;
+ Hash_Serialize(hash, outstream);
+ OutStream_Write_C32(outstream, self->doc_id);
}
-lucy_Doc*
-lucy_Doc_deserialize(lucy_Doc *self, lucy_InStream *instream) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(lucy_Doc*);
+Doc*
+Doc_deserialize(Doc *self, InStream *instream) {
+ Hash *hash = (Hash*)VTable_Make_Obj(HASH);
+ self->fields = Hash_Deserialize(hash, instream);
+ self->doc_id = InStream_Read_C32(instream);
+ return self;
}
-lucy_Obj*
-lucy_Doc_extract(lucy_Doc *self, lucy_CharBuf *field,
- lucy_ViewCharBuf *target) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(lucy_Obj*);
+Obj*
+Doc_extract(Doc *self, CharBuf *field,
+ ViewCharBuf *target) {
+ Hash *hash = (Hash *)self->fields;
+ Obj *obj = Hash_Fetch(hash, (Obj *)field);
+
+ if (obj && Obj_Is_A(obj, CHARBUF)) {
+ ViewCB_Assign(target, (CharBuf *)obj);
+ }
+
+ return obj;
}
void*
-lucy_Doc_to_host(lucy_Doc *self) {
- THROW(LUCY_ERR, "TODO");
+Doc_to_host(Doc *self) {
+ THROW(ERR, "TODO");
UNREACHABLE_RETURN(void*);
}
-lucy_Hash*
-lucy_Doc_dump(lucy_Doc *self) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(lucy_Hash*);
+Hash*
+Doc_dump(Doc *self) {
+ THROW(ERR, "TODO");
+ UNREACHABLE_RETURN(Hash*);
}
-lucy_Doc*
-lucy_Doc_load(lucy_Doc *self, lucy_Obj *dump) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(lucy_Doc*);
+Doc*
+Doc_load(Doc *self, Obj *dump) {
+ THROW(ERR, "TODO");
+ UNREACHABLE_RETURN(Doc*);
}
bool
-lucy_Doc_equals(lucy_Doc *self, lucy_Obj *other) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(bool);
+Doc_equals(Doc *self, Obj *other) {
+ Doc *twin = (Doc*)other;
+
+ if (twin == self) { return true; }
+ if (!Obj_Is_A(other, DOC)) { return false; }
+
+ return Hash_Equals(self->fields, twin->fields);
}
void
-lucy_Doc_destroy(lucy_Doc *self) {
- THROW(LUCY_ERR, "TODO");
+Doc_destroy(Doc *self) {
+ DECREF(self->fields);
+ SUPER_DESTROY(self, DOC);
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Index/DocReader.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Index/DocReader.c b/c/src/Lucy/Index/DocReader.c
index 430ccbe..e06e7ee 100644
--- a/c/src/Lucy/Index/DocReader.c
+++ b/c/src/Lucy/Index/DocReader.c
@@ -16,14 +16,105 @@
#define C_LUCY_DOCREADER
#define C_LUCY_DEFAULTDOCREADER
+#define CHY_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
-#include "CFBind.h"
#include "Lucy/Index/DocReader.h"
+#include "Clownfish/ByteBuf.h"
+#include "Clownfish/CharBuf.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/Num.h"
+#include "Clownfish/Util/Memory.h"
#include "Lucy/Document/HitDoc.h"
+#include "Lucy/Plan/FieldType.h"
+#include "Lucy/Plan/Schema.h"
+#include "Lucy/Store/InStream.h"
-lucy_HitDoc*
-lucy_DefDocReader_fetch_doc(lucy_DefaultDocReader *self, int32_t doc_id) {
- THROW(LUCY_ERR, "TODO");
- UNREACHABLE_RETURN(lucy_HitDoc*);
+HitDoc*
+DefDocReader_fetch_doc(DefaultDocReader *self, int32_t doc_id) {
+ Schema *const schema = self->schema;
+ InStream *const dat_in = self->dat_in;
+ InStream *const ix_in = self->ix_in;
+ Hash *const fields = Hash_new(1);
+ int64_t start;
+ uint32_t num_fields;
+ uint32_t field_name_cap = 31;
+ char *field_name = (char*)MALLOCATE(field_name_cap + 1);
+
+ // Get data file pointer from index, read number of fields.
+ InStream_Seek(ix_in, (int64_t)doc_id * 8);
+ start = InStream_Read_U64(ix_in);
+ InStream_Seek(dat_in, start);
+ num_fields = InStream_Read_C32(dat_in);
+
+ // Decode stored data and build up the doc field by field.
+ while (num_fields--) {
+ uint32_t field_name_len;
+ Obj *value;
+ FieldType *type;
+
+ // Read field name.
+ field_name_len = InStream_Read_C32(dat_in);
+ if (field_name_len > field_name_cap) {
+ field_name_cap = field_name_len;
+ field_name = (char*)REALLOCATE(field_name,
+ field_name_cap + 1);
+ }
+ InStream_Read_Bytes(dat_in, field_name, field_name_len);
+
+ // Find the Field's FieldType.
+ ZombieCharBuf *field_name_zcb
+ = ZCB_WRAP_STR(field_name, field_name_len);
+ type = Schema_Fetch_Type(schema, (CharBuf*)field_name_zcb);
+
+ // Read the field value.
+ switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
+ case FType_TEXT: {
+ uint32_t value_len = InStream_Read_C32(dat_in);
+ char *buf = (char*)MALLOCATE(value_len + 1);
+ InStream_Read_Bytes(dat_in, buf, value_len);
+ buf[value_len] = '\0';
+ value = (Obj*)CB_new_steal_from_trusted_str(
+ buf, value_len, value_len + 1);
+ break;
+ }
+ case FType_BLOB: {
+ uint32_t value_len = InStream_Read_C32(dat_in);
+ char *buf = (char*)MALLOCATE(value_len);
+ InStream_Read_Bytes(dat_in, buf, value_len);
+ value = (Obj*)BB_new_steal_bytes(
+ buf, value_len, value_len);
+ break;
+ }
+ case FType_FLOAT32:
+ value = (Obj*)Float32_new(
+ InStream_Read_F32(dat_in));
+ break;
+ case FType_FLOAT64:
+ value = (Obj*)Float64_new(
+ InStream_Read_F64(dat_in));
+ break;
+ case FType_INT32:
+ value = (Obj*)Int32_new(
+ (int32_t)InStream_Read_C32(dat_in));
+ break;
+ case FType_INT64:
+ value = (Obj*)Int64_new(
+ (int64_t)InStream_Read_C64(dat_in));
+ break;
+ default:
+ value = NULL;
+ THROW(ERR, "Unrecognized type: %o", type);
+ }
+
+ // Store the value.
+ Hash_Store_Str(fields, field_name, field_name_len, value);
+ }
+ FREEMEM(field_name);
+
+ HitDoc *retval = HitDoc_new(fields, doc_id, 0.0);
+ DECREF(fields);
+ return retval;
}
http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Index/Inverter.c
----------------------------------------------------------------------
diff --git a/c/src/Lucy/Index/Inverter.c b/c/src/Lucy/Index/Inverter.c
index e77c067..cfaafcd 100644
--- a/c/src/Lucy/Index/Inverter.c
+++ b/c/src/Lucy/Index/Inverter.c
@@ -16,14 +16,112 @@
#define C_LUCY_INVERTER
#define C_LUCY_INVERTERENTRY
+#define CHY_USE_SHORT_NAMES
+#define LUCY_USE_SHORT_NAMES
-#include "CFBind.h"
#include "Lucy/Index/Inverter.h"
+#include "Clownfish/ByteBuf.h"
+#include "Clownfish/CharBuf.h"
+#include "Clownfish/Err.h"
+#include "Clownfish/Hash.h"
+#include "Clownfish/Num.h"
+#include "Clownfish/VArray.h"
#include "Lucy/Document/Doc.h"
+#include "Lucy/Index/Segment.h"
+#include "Lucy/Plan/FieldType.h"
+#include "Lucy/Plan/Schema.h"
+
+static InverterEntry*
+S_fetch_entry(Inverter *self, CharBuf *field) {
+ Schema *const schema = self->schema;
+ int32_t field_num = Seg_Field_Num(self->segment, field);
+ if (!field_num) {
+ // This field seems not to be in the segment yet. Try to find it in
+ // the Schema.
+ if (Schema_Fetch_Type(schema, field)) {
+ // The field is in the Schema. Get a field num from the Segment.
+ field_num = Seg_Add_Field(self->segment, field);
+ }
+ else {
+ // We've truly failed to find the field. The user must
+ // not have spec'd it.
+ THROW(ERR, "Unknown field name: '%o'", field);
+ }
+ }
+
+ InverterEntry *entry
+ = (InverterEntry*)VA_Fetch(self->entry_pool, field_num);
+ if (!entry) {
+ entry = InvEntry_new(schema, (CharBuf*)field, field_num);
+ VA_Store(self->entry_pool, field_num, (Obj*)entry);
+ }
+ return entry;
+}
void
-lucy_Inverter_invert_doc(lucy_Inverter *self, lucy_Doc *doc) {
- THROW(LUCY_ERR, "TODO");
+Inverter_invert_doc(Inverter *self, Doc *doc) {
+ Hash *const fields = (Hash*)Doc_Get_Fields(doc);
+ uint32_t num_keys = Hash_Iterate(fields);
+
+ // Prepare for the new doc.
+ Inverter_Set_Doc(self, doc);
+
+ // Extract and invert the doc's fields.
+ while (num_keys--) {
+ Obj *key, *obj;
+ Hash_Next(fields, &key, &obj);
+ CharBuf *field = (CharBuf*)CERTIFY(key, CHARBUF);
+ InverterEntry *inv_entry = S_fetch_entry(self, field);
+ FieldType *type = inv_entry->type;
+
+ // Get the field value.
+ switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
+ case FType_TEXT: {
+ CharBuf *char_buf
+ = (CharBuf*)CERTIFY(obj, CHARBUF);
+ ViewCharBuf *value
+ = (ViewCharBuf*)inv_entry->value;
+ ViewCB_Assign(value, char_buf);
+ break;
+ }
+ case FType_BLOB: {
+ ByteBuf *byte_buf
+ = (ByteBuf*)CERTIFY(obj, BYTEBUF);
+ ViewByteBuf *value
+ = (ViewByteBuf*)inv_entry->value;
+ ViewBB_Assign(value, byte_buf);
+ break;
+ }
+ case FType_INT32: {
+ int32_t int_val = (int32_t)Obj_To_I64(obj);
+ Integer32* value = (Integer32*)inv_entry->value;
+ Int32_Set_Value(value, int_val);
+ break;
+ }
+ case FType_INT64: {
+ int64_t int_val = Obj_To_I64(obj);
+ Integer64* value = (Integer64*)inv_entry->value;
+ Int64_Set_Value(value, int_val);
+ break;
+ }
+ case FType_FLOAT32: {
+ float float_val = (float)Obj_To_F64(obj);
+ Float32* value = (Float32*)inv_entry->value;
+ Float32_Set_Value(value, float_val);
+ break;
+ }
+ case FType_FLOAT64: {
+ double float_val = Obj_To_F64(obj);
+ Float64* value = (Float64*)inv_entry->value;
+ Float64_Set_Value(value, float_val);
+ break;
+ }
+ default:
+ THROW(ERR, "Unrecognized type: %o", type);
+ }
+
+ Inverter_Add_Field(self, inv_entry);
+ }
}