You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/09/21 18:49:50 UTC
[lucy-commits] [2/2] git commit: refs/heads/cfish-string-prep1 - Optimize writing of
terms
Optimize writing of terms
* Make TextTermStepper#Write_* accept either Strings or CharBufs.
* Convert LexiconWriter#Add_Term to accept an Obj.
* Convert S_write_terms_and_postings in PostingPool to use a CharBuf
in order to avoid repeated String allocations.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/c69fb741
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/c69fb741
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/c69fb741
Branch: refs/heads/cfish-string-prep1
Commit: c69fb741a5d016455b56de8ca3890c33f55ce464
Parents: 1d7ea09
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Sep 21 15:25:32 2013 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sat Sep 21 16:16:03 2013 +0200
----------------------------------------------------------------------
core/Lucy/Index/LexiconWriter.c | 5 +++--
core/Lucy/Index/LexiconWriter.cfh | 2 +-
core/Lucy/Index/PostingPool.c | 20 ++++++++++----------
core/Lucy/Plan/TextType.c | 34 ++++++++++++++++++++++++----------
core/Lucy/Plan/TextType.cfh | 2 +-
5 files changed, 39 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/LexiconWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/LexiconWriter.c b/core/Lucy/Index/LexiconWriter.c
index 5189c49..4253e67 100644
--- a/core/Lucy/Index/LexiconWriter.c
+++ b/core/Lucy/Index/LexiconWriter.c
@@ -18,6 +18,7 @@
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Index/LexiconWriter.h"
+#include "Clownfish/CharBuf.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Index/PolyReader.h"
@@ -104,7 +105,7 @@ S_add_last_term_to_ix(LexiconWriter *self) {
}
void
-LexWriter_Add_Term_IMP(LexiconWriter* self, String* term_text, TermInfo* tinfo) {
+LexWriter_Add_Term_IMP(LexiconWriter* self, Obj* term_text, TermInfo* tinfo) {
LexiconWriterIVARS *const ivars = LexWriter_IVARS(self);
OutStream *dat_out = ivars->dat_out;
@@ -115,7 +116,7 @@ LexWriter_Add_Term_IMP(LexiconWriter* self, String* term_text, TermInfo* tinfo)
S_add_last_term_to_ix(self);
}
- TermStepper_Write_Delta(ivars->term_stepper, dat_out, (Obj*)term_text);
+ TermStepper_Write_Delta(ivars->term_stepper, dat_out, term_text);
TermStepper_Write_Delta(ivars->tinfo_stepper, dat_out, (Obj*)tinfo);
// Track number of terms.
http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/LexiconWriter.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/LexiconWriter.cfh b/core/Lucy/Index/LexiconWriter.cfh
index 340e271..867120c 100644
--- a/core/Lucy/Index/LexiconWriter.cfh
+++ b/core/Lucy/Index/LexiconWriter.cfh
@@ -72,7 +72,7 @@ class Lucy::Index::LexiconWriter cnick LexWriter
* field number).
*/
void
- Add_Term(LexiconWriter* self, String* term_text, TermInfo* tinfo);
+ Add_Term(LexiconWriter* self, Obj* term_text, TermInfo* tinfo);
public void
Add_Segment(LexiconWriter *self, SegReader *reader,
http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/PostingPool.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/PostingPool.c b/core/Lucy/Index/PostingPool.c
index a969fd5..a3e61c0 100644
--- a/core/Lucy/Index/PostingPool.c
+++ b/core/Lucy/Index/PostingPool.c
@@ -22,6 +22,7 @@
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Index/PostingPool.h"
+#include "Clownfish/CharBuf.h"
#include "Lucy/Analysis/Inversion.h"
#include "Lucy/Plan/Architecture.h"
#include "Lucy/Plan/FieldType.h"
@@ -377,10 +378,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
(*(RawPosting**)PostPool_Fetch(self)),
RAWPOSTING);
RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
- String *last_term_text
- = Str_new_from_utf8(post_ivars->blob, post_ivars->content_len);
- const char *last_text_buf = Str_Get_Ptr8(last_term_text);
- uint32_t last_text_size = Str_Get_Size(last_term_text);
+ CharBuf *last_term_text
+ = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
+ const char *last_text_buf = CB_Get_Ptr8(last_term_text);
+ uint32_t last_text_size = CB_Get_Size(last_term_text);
SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);
// Initialize sentinel to be used on the last iter, using an empty string
@@ -413,7 +414,7 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
// If the term text changes, process the last term.
if (!same_text_as_last) {
// Hand off to LexiconWriter.
- LexWriter_Add_Term(lex_writer, last_term_text, tinfo);
+ LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);
// Start each term afresh.
TInfo_Reset(tinfo);
@@ -426,11 +427,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
last_skip_filepos = tinfo_ivars->post_filepos;
// Remember the term_text so we can write string diffs.
- DECREF(last_term_text);
- last_term_text
- = Str_new_from_utf8(post_ivars->blob, post_ivars->content_len);
- last_text_buf = Str_Get_Ptr8(last_term_text);
- last_text_size = Str_Get_Size(last_term_text);
+ CB_Mimic_Utf8(last_term_text, post_ivars->blob,
+ post_ivars->content_len);
+ last_text_buf = CB_Get_Ptr8(last_term_text);
+ last_text_size = CB_Get_Size(last_term_text);
}
// Bail on last iter before writing invalid posting data.
http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Plan/TextType.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c
index 487317c..675b51a 100644
--- a/core/Lucy/Plan/TextType.c
+++ b/core/Lucy/Plan/TextType.c
@@ -93,11 +93,12 @@ void
TextTermStepper_Write_Key_Frame_IMP(TextTermStepper *self,
OutStream *outstream, Obj *value) {
TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
- const char *buf = Str_Get_Ptr8((String*)value);
- size_t size = Str_Get_Size((String*)value);
+ CharBuf *charbuf = (CharBuf*)ivars->value;
+ CB_Mimic(charbuf, value);
+ const char *buf = CB_Get_Ptr8(charbuf);
+ size_t size = CB_Get_Size(charbuf);
OutStream_Write_C32(outstream, size);
OutStream_Write_Bytes(outstream, buf, size);
- Obj_Mimic(ivars->value, value);
// Invalidate string.
DECREF(ivars->string);
ivars->string = NULL;
@@ -107,12 +108,25 @@ void
TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
Obj *value) {
TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
- String *new_value = (String*)CERTIFY(value, STRING);
- CharBuf *last_value = (CharBuf*)ivars->value;
- const char *new_text = Str_Get_Ptr8(new_value);
- size_t new_size = Str_Get_Size(new_value);
- const char *last_text = CB_Get_Ptr8(last_value);
- size_t last_size = CB_Get_Size(last_value);
+ CharBuf *charbuf = (CharBuf*)ivars->value;
+ const char *last_text = CB_Get_Ptr8(charbuf);
+ size_t last_size = CB_Get_Size(charbuf);
+ const char *new_text = NULL;
+ size_t new_size = 0;
+
+ if (Obj_Is_A(value, STRING)) {
+ String *new_string = (String*)value;
+ new_text = Str_Get_Ptr8(new_string);
+ new_size = Str_Get_Size(new_string);
+ }
+ else if (Obj_Is_A(value, CHARBUF)) {
+ CharBuf *new_charbuf = (CharBuf*)value;
+ new_text = CB_Get_Ptr8(new_charbuf);
+ new_size = CB_Get_Size(new_charbuf);
+ }
+ else {
+ THROW(ERR, "'value' must be a String or CharBuf");
+ }
// Count how many bytes the strings share at the top.
const int32_t overlap = StrHelp_overlap(last_text, new_text,
@@ -125,7 +139,7 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
OutStream_Write_String(outstream, diff_start_str, diff_len);
// Update value.
- Obj_Mimic(ivars->value, value);
+ CB_Mimic_Utf8(charbuf, new_text, new_size);
// Invalidate string.
DECREF(ivars->string);
http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Plan/TextType.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Plan/TextType.cfh b/core/Lucy/Plan/TextType.cfh
index 33b70ec..b2bf014 100644
--- a/core/Lucy/Plan/TextType.cfh
+++ b/core/Lucy/Plan/TextType.cfh
@@ -42,7 +42,7 @@ class Lucy::Index::TermStepper::TextTermStepper
Reset(TextTermStepper *self);
/**
- * @param value A String.
+ * @param value A String or CharBuf.
*/
public void
Set_Value(TextTermStepper *self, Obj *value = NULL);