You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/09/21 18:49:49 UTC

[lucy-commits] [1/2] git commit: refs/heads/cfish-string-prep1 - Copy substring of wrapped strings

Updated Branches:
  refs/heads/cfish-string-prep1 eb309582b -> c69fb741a


Copy substring of wrapped strings


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/1d7ea094
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/1d7ea094
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/1d7ea094

Branch: refs/heads/cfish-string-prep1
Commit: 1d7ea094256ace6ae85cdd859d9297e8acb9b019
Parents: eb30958
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Wed Sep 18 20:43:48 2013 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sat Sep 21 01:58:00 2013 +0200

----------------------------------------------------------------------
 clownfish/runtime/core/Clownfish/String.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/1d7ea094/clownfish/runtime/core/Clownfish/String.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/String.c b/clownfish/runtime/core/Clownfish/String.c
index 8747ab3..ab4d34e 100644
--- a/clownfish/runtime/core/Clownfish/String.c
+++ b/clownfish/runtime/core/Clownfish/String.c
@@ -156,11 +156,19 @@ Str_newf(const char *pattern, ...) {
 }
 
 static String*
-S_new_substring(String *origin, size_t byte_offset, size_t size) {
+S_new_substring(String *string, size_t byte_offset, size_t size) {
     String *self = (String*)VTable_Make_Obj(STRING);
-    self->ptr    = origin->ptr + byte_offset;
-    self->size   = size;
-    self->origin = (String*)INCREF(origin);
+
+    if (string->origin == NULL) {
+        // Copy substring of wrapped strings.
+        Str_init_from_trusted_utf8(self, string->ptr + byte_offset, size);
+    }
+    else {
+        self->ptr    = string->ptr + byte_offset;
+        self->size   = size;
+        self->origin = (String*)INCREF(string->origin);
+    }
+
     return self;
 }
 


[lucy-commits] [2/2] git commit: refs/heads/cfish-string-prep1 - Optimize writing of terms

Posted by nw...@apache.org.
Optimize writing of terms

* Make TextTermStepper#Write_* accept either Strings or CharBufs.
* Convert LexiconWriter#Add_Term to accept an Obj.
* Convert S_write_terms_and_postings in PostingPool to use a CharBuf
  in order to avoid repeated String allocations.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/c69fb741
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/c69fb741
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/c69fb741

Branch: refs/heads/cfish-string-prep1
Commit: c69fb741a5d016455b56de8ca3890c33f55ce464
Parents: 1d7ea09
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Sat Sep 21 15:25:32 2013 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Sat Sep 21 16:16:03 2013 +0200

----------------------------------------------------------------------
 core/Lucy/Index/LexiconWriter.c   |  5 +++--
 core/Lucy/Index/LexiconWriter.cfh |  2 +-
 core/Lucy/Index/PostingPool.c     | 20 ++++++++++----------
 core/Lucy/Plan/TextType.c         | 34 ++++++++++++++++++++++++----------
 core/Lucy/Plan/TextType.cfh       |  2 +-
 5 files changed, 39 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/LexiconWriter.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/LexiconWriter.c b/core/Lucy/Index/LexiconWriter.c
index 5189c49..4253e67 100644
--- a/core/Lucy/Index/LexiconWriter.c
+++ b/core/Lucy/Index/LexiconWriter.c
@@ -18,6 +18,7 @@
 #include "Lucy/Util/ToolSet.h"
 
 #include "Lucy/Index/LexiconWriter.h"
+#include "Clownfish/CharBuf.h"
 #include "Lucy/Plan/FieldType.h"
 #include "Lucy/Plan/Schema.h"
 #include "Lucy/Index/PolyReader.h"
@@ -104,7 +105,7 @@ S_add_last_term_to_ix(LexiconWriter *self) {
 }
 
 void
-LexWriter_Add_Term_IMP(LexiconWriter* self, String* term_text, TermInfo* tinfo) {
+LexWriter_Add_Term_IMP(LexiconWriter* self, Obj* term_text, TermInfo* tinfo) {
     LexiconWriterIVARS *const ivars = LexWriter_IVARS(self);
     OutStream *dat_out = ivars->dat_out;
 
@@ -115,7 +116,7 @@ LexWriter_Add_Term_IMP(LexiconWriter* self, String* term_text, TermInfo* tinfo)
         S_add_last_term_to_ix(self);
     }
 
-    TermStepper_Write_Delta(ivars->term_stepper, dat_out, (Obj*)term_text);
+    TermStepper_Write_Delta(ivars->term_stepper, dat_out, term_text);
     TermStepper_Write_Delta(ivars->tinfo_stepper, dat_out, (Obj*)tinfo);
 
     // Track number of terms.

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/LexiconWriter.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/LexiconWriter.cfh b/core/Lucy/Index/LexiconWriter.cfh
index 340e271..867120c 100644
--- a/core/Lucy/Index/LexiconWriter.cfh
+++ b/core/Lucy/Index/LexiconWriter.cfh
@@ -72,7 +72,7 @@ class Lucy::Index::LexiconWriter cnick LexWriter
      * field number).
      */
     void
-    Add_Term(LexiconWriter* self, String* term_text, TermInfo* tinfo);
+    Add_Term(LexiconWriter* self, Obj* term_text, TermInfo* tinfo);
 
     public void
     Add_Segment(LexiconWriter *self, SegReader *reader,

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Index/PostingPool.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Index/PostingPool.c b/core/Lucy/Index/PostingPool.c
index a969fd5..a3e61c0 100644
--- a/core/Lucy/Index/PostingPool.c
+++ b/core/Lucy/Index/PostingPool.c
@@ -22,6 +22,7 @@
 #include "Lucy/Util/ToolSet.h"
 
 #include "Lucy/Index/PostingPool.h"
+#include "Clownfish/CharBuf.h"
 #include "Lucy/Analysis/Inversion.h"
 #include "Lucy/Plan/Architecture.h"
 #include "Lucy/Plan/FieldType.h"
@@ -377,10 +378,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                               (*(RawPosting**)PostPool_Fetch(self)),
                               RAWPOSTING);
     RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
-    String *last_term_text
-        = Str_new_from_utf8(post_ivars->blob, post_ivars->content_len);
-    const char *last_text_buf  = Str_Get_Ptr8(last_term_text);
-    uint32_t    last_text_size = Str_Get_Size(last_term_text);
+    CharBuf *last_term_text
+        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
+    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
+    uint32_t    last_text_size = CB_Get_Size(last_term_text);
     SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);
 
     // Initialize sentinel to be used on the last iter, using an empty string
@@ -413,7 +414,7 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
         // If the term text changes, process the last term.
         if (!same_text_as_last) {
             // Hand off to LexiconWriter.
-            LexWriter_Add_Term(lex_writer, last_term_text, tinfo);
+            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);
 
             // Start each term afresh.
             TInfo_Reset(tinfo);
@@ -426,11 +427,10 @@ S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
             last_skip_filepos     = tinfo_ivars->post_filepos;
 
             // Remember the term_text so we can write string diffs.
-            DECREF(last_term_text);
-            last_term_text
-                = Str_new_from_utf8(post_ivars->blob, post_ivars->content_len);
-            last_text_buf  = Str_Get_Ptr8(last_term_text);
-            last_text_size = Str_Get_Size(last_term_text);
+            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
+                          post_ivars->content_len);
+            last_text_buf  = CB_Get_Ptr8(last_term_text);
+            last_text_size = CB_Get_Size(last_term_text);
         }
 
         // Bail on last iter before writing invalid posting data.

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Plan/TextType.c
----------------------------------------------------------------------
diff --git a/core/Lucy/Plan/TextType.c b/core/Lucy/Plan/TextType.c
index 487317c..675b51a 100644
--- a/core/Lucy/Plan/TextType.c
+++ b/core/Lucy/Plan/TextType.c
@@ -93,11 +93,12 @@ void
 TextTermStepper_Write_Key_Frame_IMP(TextTermStepper *self,
                                     OutStream *outstream, Obj *value) {
     TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
-    const char *buf  = Str_Get_Ptr8((String*)value);
-    size_t      size = Str_Get_Size((String*)value);
+    CharBuf *charbuf = (CharBuf*)ivars->value;
+    CB_Mimic(charbuf, value);
+    const char *buf  = CB_Get_Ptr8(charbuf);
+    size_t      size = CB_Get_Size(charbuf);
     OutStream_Write_C32(outstream, size);
     OutStream_Write_Bytes(outstream, buf, size);
-    Obj_Mimic(ivars->value, value);
     // Invalidate string.
     DECREF(ivars->string);
     ivars->string = NULL;
@@ -107,12 +108,25 @@ void
 TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
                                 Obj *value) {
     TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
-    String     *new_value  = (String*)CERTIFY(value, STRING);
-    CharBuf    *last_value = (CharBuf*)ivars->value;
-    const char *new_text   = Str_Get_Ptr8(new_value);
-    size_t      new_size   = Str_Get_Size(new_value);
-    const char *last_text  = CB_Get_Ptr8(last_value);
-    size_t      last_size  = CB_Get_Size(last_value);
+    CharBuf    *charbuf   = (CharBuf*)ivars->value;
+    const char *last_text = CB_Get_Ptr8(charbuf);
+    size_t      last_size = CB_Get_Size(charbuf);
+    const char *new_text  = NULL;
+    size_t      new_size  = 0;
+
+    if (Obj_Is_A(value, STRING)) {
+        String *new_string = (String*)value;
+        new_text = Str_Get_Ptr8(new_string);
+        new_size = Str_Get_Size(new_string);
+    }
+    else if (Obj_Is_A(value, CHARBUF)) {
+        CharBuf *new_charbuf = (CharBuf*)value;
+        new_text = CB_Get_Ptr8(new_charbuf);
+        new_size = CB_Get_Size(new_charbuf);
+    }
+    else {
+        THROW(ERR, "'value' must be a String or CharBuf");
+    }
 
     // Count how many bytes the strings share at the top.
     const int32_t overlap = StrHelp_overlap(last_text, new_text,
@@ -125,7 +139,7 @@ TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
     OutStream_Write_String(outstream, diff_start_str, diff_len);
 
     // Update value.
-    Obj_Mimic(ivars->value, value);
+    CB_Mimic_Utf8(charbuf, new_text, new_size);
 
     // Invalidate string.
     DECREF(ivars->string);

http://git-wip-us.apache.org/repos/asf/lucy/blob/c69fb741/core/Lucy/Plan/TextType.cfh
----------------------------------------------------------------------
diff --git a/core/Lucy/Plan/TextType.cfh b/core/Lucy/Plan/TextType.cfh
index 33b70ec..b2bf014 100644
--- a/core/Lucy/Plan/TextType.cfh
+++ b/core/Lucy/Plan/TextType.cfh
@@ -42,7 +42,7 @@ class Lucy::Index::TermStepper::TextTermStepper
     Reset(TextTermStepper *self);
 
     /**
-     * @param value A String.
+     * @param value A String or CharBuf.
      */
     public void
     Set_Value(TextTermStepper *self, Obj *value = NULL);