You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2009/09/15 03:29:17 UTC

svn commit: r814956 - in /lucene/lucy/trunk: core/Lucy/Object/CharBuf.bp core/Lucy/Object/CharBuf.c core/Lucy/Test/Object/TestCharBuf.bp core/Lucy/Test/Object/TestCharBuf.c perl/lib/Lucy/Object/CharBuf.pm perl/t/core/029-charbuf.t

Author: marvin
Date: Tue Sep 15 01:29:17 2009
New Revision: 814956

URL: http://svn.apache.org/viewvc?rev=814956&view=rev
Log:
Commit LUCY-44, adding CharBuf, ViewCharBuf and ZombieCharBuf.

Added:
    lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp   (with props)
    lucene/lucy/trunk/core/Lucy/Object/CharBuf.c   (with props)
    lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp   (with props)
    lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c   (with props)
    lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm   (with props)
    lucene/lucy/trunk/perl/t/core/029-charbuf.t   (with props)

Added: lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp Tue Sep 15 01:29:17 2009
@@ -0,0 +1,373 @@
+parcel Lucy;
+
+__C__
+#include <stdarg.h>
+__END_C__
+
+/** 
+ * Growable buffer holding Unicode characters. 
+ */ 
+
+class Lucy::Object::CharBuf cnick CB
+    extends Lucy::Object::Obj {
+
+    char    *ptr;
+    size_t   size;
+    size_t   cap;  /* allocated bytes, including terminating null */
+
+    inert incremented CharBuf*
+    new(size_t size);
+
+    inert CharBuf*
+    init(CharBuf *self, size_t size);
+
+    /** Return a new CharBuf which holds a copy of the passed-in string.
+     * Check for UTF-8 validity.
+     */
+    inert incremented CharBuf* 
+    new_from_utf8(const char *utf8, size_t size);
+
+    /** Return a new CharBuf which holds a copy of the passed-in string.  No
+     * validity checking is performed.
+     */
+    inert incremented CharBuf* 
+    new_from_trusted_utf8(const char *utf8, size_t size);
+
+    /** Return a pointer to a new CharBuf which assumes ownership of the
+     * passed-in string.  Check validity of supplied UTF-8.
+     */
+    inert incremented CharBuf*
+    new_steal_str(char *ptr, size_t size, size_t cap);
+
+    /** Return a pointer to a new CharBuf which assumes ownership of the
+     * passed-in string.  Do not check validity of supplied UTF-8.
+     */
+    inert incremented CharBuf*
+    new_steal_from_trusted_str(char *ptr, size_t size, size_t cap);
+
+    /** Return a pointer to a new CharBuf which contains formatted data 
+     * expanded according to CB_VCatF.  
+     * 
+     * Note: allowing a user-supplied format<code>pattern</code> string is a
+     * security hole and should not be allowed.
+     */
+    inert incremented CharBuf*
+    newf(const char *pattern, ...);
+
+    /** Perform lexical comparison of two CharBufs, with level of indirection
+     * set to please qsort and friends.
+     */
+    inert int
+    compare(const void *va, const void *vb); 
+
+    /** Perform lexical comparison of two CharBufs, with level of indirection
+     * set to please qsort and friends, and return true if <code>a</code> is
+     * less than <code>b</code>.
+     */
+    inert bool_t
+    less_than(const void *va, const void *vb); 
+
+    public void
+    Mimic(CharBuf *self, Obj *other);
+
+    void
+    Mimic_Str(CharBuf *self, const char *ptr, size_t size);
+
+    /** Concatenate the passed-in string onto the end of the CharBuf.
+     */
+    void
+    Cat_Str(CharBuf *self, const char *ptr, size_t size);
+
+    /** Concatenate the contents of <code>other</code> onto the end of the
+     * caller.
+     */
+    void
+    Cat(CharBuf *self, const CharBuf *other);
+
+    /** Concatenate formatted arguments.  Similar to the printf family, but
+     * only accepts minimal options (just enough for decent error messages).
+     *
+     * Objects:  %o
+     * char*:    %s
+     * integers: %i8 %i32 %i64 %u8 %u32 %u64
+     * floats:   %f64
+     * hex:      %x32 
+     * 
+     * Note that all Lucy Objects, including CharBufs, are printed via
+     * %o (which invokes Obj_To_String()). 
+     */
+    void
+    VCatF(CharBuf *self, const char *pattern, va_list args);
+
+    /** Invokes CB_VCatF to concatenate formatted arguments.  Note that this
+     * is only a function and not a method.
+     */
+    inert void
+    catf(CharBuf *self, const char *pattern, ...);
+
+    /** Replaces the contents of the CharBuf using formatted arguments.
+     */
+    inert void
+    setf(CharBuf *self, const char *pattern, ...);
+
+    /** Concatenate one Unicode character onto the end of the CharBuf.
+     */
+    void
+    Cat_Char(CharBuf *self, u32_t code_point);
+
+    /** Replace all instances of one character for the other.  For now, both
+     * the source and replacement code points must be ASCII.
+     */
+    i32_t
+    Swap_Chars(CharBuf *self, u32_t match, u32_t replacement);
+
+    public i64_t
+    To_I64(CharBuf *self);
+
+    /** Extract a 64-bit integer from a variable-base stringified version.
+     */
+    i64_t
+    BaseX_To_I64(CharBuf *self, u32_t base);
+
+    public double
+    To_F64(CharBuf *self);
+
+    /** Assign more memory to the CharBuf, if it doesn't already have enough
+     * room to hold a string of <code>size</code> bytes.  Cannot shrink the
+     * allocation.
+     * 
+     * @return a pointer to the raw buffer.
+     */
+    char*
+    Grow(CharBuf *self, size_t size);
+
+    /** Test whether the CharBuf starts with the content of another.
+     */
+    bool_t
+    Starts_With(CharBuf *self, const CharBuf *prefix);
+
+    /** Test whether the CharBuf starts with the passed-in string.
+     */
+    bool_t
+    Starts_With_Str(CharBuf *self, const char *prefix, size_t size);
+
+    /** Test whether the CharBuf ends with the content of another.
+     */
+    bool_t
+    Ends_With(CharBuf *self, const CharBuf *postfix);
+
+    /** Test whether the CharBuf ends with the passed-in string.
+     */
+    bool_t
+    Ends_With_Str(CharBuf *self, const char *postfix, size_t size);
+
+    /** Test whether the CharBuf matches the passed-in string.
+     */
+    bool_t
+    Equals_Str(CharBuf *self, const char *ptr, size_t size);
+
+    /** Return the number of Unicode code points in the object's string. 
+     */
+    size_t
+    Length(CharBuf *self);
+
+    /** Set the CharBuf's <code>size</code> attribute.
+     */
+    void
+    Set_Size(CharBuf *self, size_t size);
+
+    /** Get the CharBuf's <code>size</code> attribute.
+     */
+    size_t
+    Get_Size(CharBuf *self);
+
+    /** Return the internal backing array for the CharBuf if its internal
+     * encoding is UTF-8.  If it is not encoded as UTF-8 throw an exception.
+     */
+    u8_t*
+    Get_Ptr8(CharBuf *self);
+
+    /** Return a fresh copy of the string data in a CharBuf with an internal
+     * encoding of UTF-8.
+     */
+    CharBuf*
+    To_CB8(CharBuf *self);
+    
+    public incremented CharBuf*
+    Clone(CharBuf *self);
+
+    public void
+    Destroy(CharBuf *self);
+
+    public bool_t
+    Equals(CharBuf *self, Obj *other);
+
+    public i32_t
+    Compare_To(CharBuf *self, Obj *other);
+
+    public i32_t
+    Hash_Code(CharBuf *self);
+
+    public incremented CharBuf*
+    To_String(CharBuf *self);
+
+    public incremented CharBuf*
+    Load(CharBuf *self, Obj *dump);
+
+    /** Remove Unicode whitespace characters from both top and tail.
+     */
+    u32_t
+    Trim(CharBuf *self);
+
+    /** Remove leading Unicode whitespace.
+     */
+    u32_t
+    Trim_Top(CharBuf *self);
+
+    /** Remove trailing Unicode whitespace.
+     */
+    u32_t
+    Trim_Tail(CharBuf *self);
+
+    /** Remove characters (measured in code points) from the top of the
+     * CharBuf.  Returns the number nipped.
+     */
+    size_t
+    Nip(CharBuf *self, size_t count);
+
+    /** Remove one character from the top of the CharBuf.  Returns the code
+     * point, or 0 if the string was empty.
+     */
+    i32_t
+    Nip_One(CharBuf *self);
+
+    /** Remove characters (measured in code points) from the end of the
+     * CharBuf.  Returns the number chopped.
+     */
+    size_t
+    Chop(CharBuf *self, size_t count);
+
+    /** Truncate the CharBuf so that it contains no more than
+     * <code>count</code>characters.
+     *
+     * @param count Maximum new length, in Unicode code points.
+     * @return The number of code points left in the string after truncation.
+     */
+    size_t
+    Truncate(CharBuf *self, size_t count);
+
+    /** Return the Unicode code point at the specified number of code points
+     * in.  Return 0 if the string length is exceeded.  (XXX It would be
+     * better to throw an exception, but that's not practical with UTF-8 and
+     * no cached length.)
+     */
+    u32_t
+    Code_Point_At(CharBuf *self, size_t tick);
+
+    /** Return the Unicode code point at the specified number of code points
+     * counted backwards from the end of the string.  Return 0 if outside the
+     * string.
+     */
+    u32_t
+    Code_Point_From(CharBuf *self, size_t tick);
+
+    /** Return a newly allocated CharBuf containing a copy of the indicated
+     * substring.
+     * @param offset Offset from the top, in code points.
+     * @param len The desired length of the substring, in code points.
+     */
+    incremented CharBuf*
+    SubString(CharBuf *self, size_t offset, size_t len);
+
+    /** Concatenate the supplied text onto the end of the CharBuf.  Don't
+     * check for UTF-8 validity.
+     */
+    void 
+    Cat_Trusted_Str(CharBuf *self, const char *ptr, size_t size);
+}
+
+class Lucy::Object::ViewCharBuf cnick ViewCB
+    extends Lucy::Object::CharBuf {
+    
+    inert incremented ViewCharBuf*
+    new_from_utf8(const char *utf8, size_t size);
+
+    inert incremented ViewCharBuf*
+    new_from_trusted_utf8(const char *utf8, size_t size);
+
+    inert ViewCharBuf*
+    init(ViewCharBuf *self, const char *utf8, size_t size);
+
+    void
+    Assign(ViewCharBuf *self, const CharBuf *other);
+
+    void
+    Assign_Str(ViewCharBuf *self, const char *utf8, size_t size);
+
+    u32_t
+    Trim_Top(ViewCharBuf *self);
+
+    size_t
+    Nip(ViewCharBuf *self, size_t count);
+
+    i32_t
+    Nip_One(ViewCharBuf *self);
+
+    /** Throws an error. */
+    void
+    Grow(ViewCharBuf *self, size_t size);
+
+    public void
+    Destroy(ViewCharBuf *self);
+}
+
+class Lucy::Object::ZombieCharBuf cnick ZCB
+    extends Lucy::Object::ViewCharBuf {
+
+    /** Throws an error.
+     */
+    public void
+    Destroy(ZombieCharBuf *self);
+}
+
+__C__
+
+#define LUCY_ZCB_BLANK \
+    { LUCY_ZOMBIECHARBUF, {1}, "", 0, 0 }
+#define LUCY_ZCB_LITERAL(string) \
+    { LUCY_ZOMBIECHARBUF, {1}, string "", sizeof(string) -1, sizeof(string) }
+
+extern lucy_ZombieCharBuf LUCY_ZCB_EMPTY;
+
+/* Simple functions which allows assignment idiom for ZombieCharBuf values. 
+ */
+lucy_ZombieCharBuf
+lucy_ZCB_make_str(const char *ptr, size_t size);
+
+lucy_ZombieCharBuf
+lucy_ZCB_make(const lucy_CharBuf *source);
+
+#ifdef LUCY_USE_SHORT_NAMES
+  #define ZCB_BLANK             LUCY_ZCB_BLANK
+  #define ZCB_LITERAL(_string)  LUCY_ZCB_LITERAL(_string)
+  #define EMPTY                 LUCY_ZCB_EMPTY
+  #define ZCB_make              lucy_ZCB_make
+  #define ZCB_make_str          lucy_ZCB_make_str
+#endif
+__END_C__
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Object/CharBuf.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Object/CharBuf.c?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Object/CharBuf.c (added)
+++ lucene/lucy/trunk/core/Lucy/Object/CharBuf.c Tue Sep 15 01:29:17 2009
@@ -0,0 +1,906 @@
+#define C_LUCY_CHARBUF
+#define C_LUCY_VIEWCHARBUF
+#define C_LUCY_ZOMBIECHARBUF
+#define LUCY_USE_SHORT_NAMES
+#define CHY_USE_SHORT_NAMES
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "Lucy/Object/VTable.h"
+#include "Lucy/Object/CharBuf.h"
+
+#include "Lucy/Object/Err.h"
+#include "Lucy/Util/Memory.h"
+#include "Lucy/Util/StringHelper.h"
+
+/* The end of the string (address of terminating NULL). */
+#define CBEND(self) ((self)->ptr + (self)->size)
+
+/* Reallocate if necessary. */
+static INLINE void
+SI_maybe_grow(CharBuf *self, size_t new_size);
+
+/* Maximum number of characters in a stringified 64-bit integer, including
+ * minus sign if negative.
+ */
+#define MAX_I64_CHARS 20
+
+/* Helper function for throwing invalid UTF-8 error. Since THROW uses
+ * a CharBuf internally, calling THROW with invalid UTF-8 would create an
+ * infinite loop -- so we fwrite some of the bogus text to stderr invoke
+ * THROW with a generic message. */
+static void
+S_die_invalid_utf8(const char *text, size_t size);
+
+/* Helper function for throwing invalid pattern error. */
+static void
+S_die_invalid_pattern(const char *pattern);
+
+ZombieCharBuf EMPTY = ZCB_BLANK;
+
+CharBuf*
+CB_new(size_t size) 
+{
+    CharBuf *self = (CharBuf*)VTable_Make_Obj(CHARBUF);
+    return CB_init(self, size);
+}
+
+CharBuf*
+CB_init(CharBuf *self, size_t size) 
+{
+    /* Derive. */
+    self->ptr = (char*)MALLOCATE(size + 1);
+
+     /* Init. */
+    *self->ptr = '\0'; /* Empty string. */
+ 
+    /* Assign. */
+    self->size   = 0;
+    self->cap    = size + 1;
+
+    return self;
+}
+
+CharBuf*
+CB_new_from_utf8(const char *ptr, size_t size) 
+{
+    if (!StrHelp_utf8_valid(ptr, size))
+        S_die_invalid_utf8(ptr, size);
+    return CB_new_from_trusted_utf8(ptr, size);
+}
+
+CharBuf*
+CB_new_from_trusted_utf8(const char *ptr, size_t size) 
+{
+    CharBuf *self = (CharBuf*)VTable_Make_Obj(CHARBUF);
+
+    /* Derive. */
+    self->ptr = (char*)MALLOCATE(size + 1);
+
+    /* Copy. */
+    memcpy(self->ptr, ptr, size);
+
+    /* Assign. */
+    self->size      = size;
+    self->cap       = size + 1; 
+    self->ptr[size] = '\0'; /* Null terminate. */
+    
+    return self;
+}
+
+CharBuf*
+CB_new_steal_from_trusted_str(char *ptr, size_t size, size_t cap) 
+{
+    CharBuf *self = (CharBuf*)VTable_Make_Obj(CHARBUF);
+    self->ptr  = ptr;
+    self->size = size;
+    self->cap  = cap;
+    return self;
+}
+
+CharBuf*
+CB_new_steal_str(char *ptr, size_t size, size_t cap) 
+{
+    StrHelp_utf8_valid(ptr, size);
+    return CB_new_steal_from_trusted_str(ptr, size, cap);
+}
+
+CharBuf*
+CB_newf(const char *pattern, ...) {
+    CharBuf *self = CB_new(strlen(pattern));
+    va_list args;
+    va_start(args, pattern);
+    CB_VCatF(self, pattern, args);
+    va_end(args);
+    return self;
+}
+
+void
+CB_destroy(CharBuf *self)
+{
+    FREEMEM(self->ptr);
+    SUPER_DESTROY(self, CHARBUF);
+}
+
+i32_t
+CB_hash_code(CharBuf *self)
+{
+    u32_t hashvalue = 5381; 
+    ZombieCharBuf iterator = ZCB_make(self);
+    
+    {
+        const CB_nip_one_t nip_one 
+            = (CB_nip_one_t)METHOD(iterator.vtable, CB, Nip_One);
+        while (iterator.size) {
+            u32_t code_point = (u32_t)nip_one((CharBuf*)&iterator);
+            hashvalue = ((hashvalue << 5) + hashvalue) ^ code_point; 
+        } 
+    }
+
+    return (i32_t) hashvalue;
+}
+
+static INLINE void
+SI_maybe_grow(CharBuf *self, size_t new_size) 
+{
+    /* Bail out if the buffer's already at least as big as required. */
+    if (self->cap > new_size)
+        return;
+
+    self->ptr = (char*)REALLOCATE(self->ptr, new_size + 1);
+    self->cap = new_size + 1;
+}
+
+char*
+CB_grow(CharBuf *self, size_t new_size) 
+{
+    SI_maybe_grow(self, new_size);
+    return self->ptr;
+}
+
+static void
+S_die_invalid_utf8(const char *text, size_t size)
+{
+    fprintf(stderr, "Invalid UTF-8, aborting: '");
+    fwrite(text, sizeof(char), size < 200 ? size : 200, stderr);
+    if (size > 200) fwrite("[...]", sizeof(char), 5, stderr);
+    fprintf(stderr, "'\n");
+    THROW(ERR, "Invalid UTF-8.");
+}
+
+static void
+S_die_invalid_pattern(const char *pattern)
+{
+    size_t  pattern_len = strlen(pattern);
+    fprintf(stderr, "Invalid pattern, aborting: '");
+    fwrite(pattern, sizeof(char), pattern_len, stderr);
+    fprintf(stderr, "'\n");
+    THROW(ERR, "Invalid pattern.");
+}
+
+void
+CB_setf(CharBuf *self, const char *pattern, ...)
+{
+    va_list args;
+    CB_Set_Size(self, 0);
+    va_start(args, pattern);
+    CB_VCatF(self, pattern, args);
+    va_end(args);
+}
+
+void
+CB_catf(CharBuf *self, const char *pattern, ...)
+{
+    va_list args;
+    va_start(args, pattern);
+    CB_VCatF(self, pattern, args);
+    va_end(args);
+}
+
+void
+CB_vcatf(CharBuf *self, const char *pattern, va_list args)
+{
+    size_t      pattern_len   = strlen(pattern);
+    const char *pattern_start = pattern;
+    const char *pattern_end   = pattern + pattern_len;
+    char        buf[64];
+
+    for ( ; pattern < pattern_end; pattern++) {
+        const char *slice_end = pattern;
+
+        /* Consume all characters leading up to a '%'. */
+        while (slice_end < pattern_end && *slice_end != '%') { slice_end++; }
+        if (pattern != slice_end) {
+            size_t size = slice_end - pattern;
+            CB_Cat_Trusted_Str(self, pattern, size);
+            pattern = slice_end;
+        }
+
+        if (pattern < pattern_end) {
+            pattern++; /* Move past '%'. */
+
+            switch (*pattern) {
+                case '%': {
+                    CB_Cat_Trusted_Str(self, "%", 1);
+                } 
+                break;
+
+                case 'o': {
+                    Obj *obj = va_arg(args, Obj*);
+                    if (!obj) {
+                        CB_Cat_Trusted_Str(self, "[NULL]", 6);
+                    }
+                    else if (Obj_Is_A(obj, CHARBUF)) {
+                        CB_Cat(self, (CharBuf*)obj);
+                    }
+                    else {
+                        CharBuf *string = Obj_To_String(obj);
+                        CB_Cat(self, string);
+                        DECREF(string);
+                    }
+                } 
+                break;
+
+                case 'i': {
+                    u64_t val = 0;
+                    size_t size;
+
+                    if (pattern[1] == '8') {
+                        val = va_arg(args, i32_t);
+                        pattern++;
+                    }
+                    else if (pattern[1] == '3' && pattern[2] == '2') {
+                        val = va_arg(args, i32_t);
+                        pattern += 2;
+                    }
+                    else if (pattern[1] == '6' && pattern[2] == '4') {
+                        val = va_arg(args, i64_t);
+                        pattern += 2;
+                    }
+                    else {
+                        S_die_invalid_pattern(pattern_start);
+                    }
+                    size = sprintf(buf, "%" I64P, val);
+                    CB_Cat_Trusted_Str(self, buf, size);
+                } 
+                break;
+
+                case 'u': {
+                    u64_t val = 0;
+                    size_t size;
+
+                    if (pattern[1] == '8') {
+                        val = va_arg(args, u32_t);
+                        pattern += 1;
+                    }
+                    else if (pattern[1] == '3' && pattern[2] == '2') {
+                        val = va_arg(args, u32_t);
+                        pattern += 2;
+                    }
+                    else if (pattern[1] == '6' && pattern[2] == '4') {
+                        val = va_arg(args, u64_t);
+                        pattern += 2;
+                    }
+                    else {
+                        S_die_invalid_pattern(pattern_start);
+                    }
+                    size = sprintf(buf, "%" U64P, val);
+                    CB_Cat_Trusted_Str(self, buf, size);
+                } 
+                break;
+
+                case 'f': {
+                    if (pattern[1] == '6' && pattern[2] == '4') {
+                        double num  = va_arg(args, double);
+                        size_t size = sprintf(buf, "%g", num);
+                        CB_Cat_Trusted_Str(self, buf, size);
+                        pattern += 2;
+                    }
+                    else {
+                        S_die_invalid_pattern(pattern_start);
+                    }
+                } 
+                break;
+
+                case 'x': {
+                    if (pattern[1] == '3' && pattern[2] == '2') {
+                        unsigned long val = va_arg(args, u32_t);
+                        size_t size = sprintf(buf, "%.8lx", val);
+                        CB_Cat_Trusted_Str(self, buf, size);
+                        pattern += 2;
+                    }
+                    else {
+                        S_die_invalid_pattern(pattern_start);
+                    }
+                } 
+                break;
+
+                case 's': {
+                    char *string = va_arg(args, char*);
+                    if (string == NULL) {
+                        CB_Cat_Trusted_Str(self, "[NULL]", 6);
+                    }
+                    else {
+                        size_t size = strlen(string);
+                        if (StrHelp_utf8_valid(string, size)) {
+                            CB_Cat_Trusted_Str(self, string, size);
+                        }
+                        else {
+                            CB_Cat_Trusted_Str(self, "[INVALID UTF8]", 14);
+                        }
+                    }
+                } 
+                break;
+
+                /* Assume NULL-terminated pattern string, which eliminates the
+                 * need for bounds checking if '%' is the last visible
+                 * character. */
+                default: {
+                    S_die_invalid_pattern(pattern_start);
+                }
+            }
+        }
+    }
+}
+
+CharBuf*
+CB_to_string(CharBuf *self)
+{
+    return CB_new_from_trusted_utf8(self->ptr, self->size);
+}
+
+void
+CB_cat_char(CharBuf *self, u32_t code_point)
+{
+    const size_t MIN_SAFE_ROOM = 4 + 1;
+    if (self->size + MIN_SAFE_ROOM > self->cap) {
+        SI_maybe_grow(self, self->size + 10);
+    }
+    self->size += StrHelp_encode_utf8_char(code_point, (u8_t*)CBEND(self));
+    *CBEND(self) = '\0';
+}
+
+i32_t
+CB_swap_chars(CharBuf *self, u32_t match, u32_t replacement)
+{
+    i32_t num_swapped = 0;
+
+    if (match > 127) { 
+        THROW(ERR, "match point too high: %u32", match);
+    }
+    else if (replacement > 127) { 
+        THROW(ERR, "replacement code point too high: %u32", replacement);
+    }
+    else {
+        char *ptr = self->ptr;
+        char *const limit = CBEND(self);
+        for ( ; ptr < limit; ptr++) {
+            if (*ptr == (char)match) { 
+                *ptr = (char)replacement; 
+                num_swapped++;
+            }
+        }
+    }
+
+    return num_swapped;
+}
+
+i64_t
+CB_to_i64(CharBuf *self) 
+{
+    return CB_BaseX_To_I64(self, 10);
+}
+
+i64_t
+CB_basex_to_i64(CharBuf *self, u32_t base)
+{
+    ZombieCharBuf iterator = ZCB_make(self);
+    i64_t retval = 0;
+    bool_t is_negative = false;
+
+    /* Advance past minus sign. */
+    if (ZCB_Code_Point_At(&iterator, 0) == '-') { 
+        ZCB_Nip_One(&iterator);
+        is_negative = true;
+    }
+
+    /* Accumulate. */
+    while (iterator.size) {
+        i32_t code_point = ZCB_Nip_One(&iterator);
+        if (isalnum(code_point)) {
+            i32_t addend = isdigit(code_point)
+                         ? code_point - '0'
+                         : tolower(code_point) - 'a' + 10;
+            if (addend > (i32_t)base) break;
+            retval *= base;
+            retval += addend;
+        }
+        else {
+            break;
+        }
+    }
+
+    /* Apply minus sign. */
+    if (is_negative) retval = 0 - retval;
+
+    return retval;
+}
+
+double
+CB_to_f64(CharBuf *self)
+{
+    return strtod(self->ptr, NULL);
+}
+
+CharBuf*
+CB_to_cb8(CharBuf *self)
+{
+    return CB_new_from_trusted_utf8(self->ptr, self->size);
+}
+
+CharBuf*
+CB_clone(CharBuf *self) 
+{
+    return CB_new_from_trusted_utf8(self->ptr, self->size);
+}
+
+CharBuf*
+CB_load(CharBuf *self, Obj *dump)
+{
+    CharBuf *source = (CharBuf*)ASSERT_IS_A(dump, CHARBUF);
+    UNUSED_VAR(self);
+    return CB_Clone(source);
+}
+
+void
+CB_mimic_str(CharBuf *self, const char* ptr, size_t size) 
+{
+    if (!StrHelp_utf8_valid(ptr, size))
+        S_die_invalid_utf8(ptr, size);
+    SI_maybe_grow(self, size);
+    memmove(self->ptr, ptr, size);
+    self->size = size;
+    self->ptr[size] = '\0';
+}
+
+void
+CB_mimic(CharBuf *self, Obj *other)
+{
+    CharBuf *evil_twin = (CharBuf*)ASSERT_IS_A(other, CHARBUF);
+    SI_maybe_grow(self, evil_twin->size);
+    memmove(self->ptr, evil_twin->ptr, evil_twin->size);
+    self->size = evil_twin->size;
+    self->ptr[evil_twin->size] = '\0';
+}
+
+void 
+CB_cat_str(CharBuf *self, const char* ptr, size_t size) 
+{
+    if (!StrHelp_utf8_valid(ptr, size))
+        S_die_invalid_utf8(ptr, size);
+    CB_cat_trusted_str(self, ptr, size);
+}
+
+void 
+CB_cat_trusted_str(CharBuf *self, const char* ptr, size_t size) 
+{
+    const size_t new_size = self->size + size;
+    SI_maybe_grow(self, new_size);
+    memcpy((self->ptr + self->size), ptr, size);
+    self->size = new_size;
+    self->ptr[new_size] = '\0';
+}
+
+void
+CB_cat(CharBuf *self, const CharBuf *other) 
+{
+    const size_t new_size = self->size + other->size;
+    SI_maybe_grow(self, new_size);
+    memcpy((self->ptr + self->size), other->ptr, other->size);
+    self->size = new_size;
+    self->ptr[new_size] = '\0';
+}
+
+bool_t
+CB_starts_with(CharBuf *self, const CharBuf *prefix)
+{
+    return CB_starts_with_str(self, prefix->ptr, prefix->size);
+}
+
+bool_t
+CB_starts_with_str(CharBuf *self, const char *prefix, size_t size)
+{
+    if (     size <= self->size
+        &&  (memcmp(self->ptr, prefix, size) == 0)
+    ) {
+        return true;
+    }
+    else {
+        return false;
+    }
+}
+
+bool_t
+CB_equals(CharBuf *self, Obj *other)
+{
+    CharBuf *const evil_twin = (CharBuf*)other;
+    if (evil_twin == self) return true;
+    if (!Obj_Is_A(evil_twin, CHARBUF)) return false;
+    return CB_equals_str(self, evil_twin->ptr, evil_twin->size);
+}
+
+i32_t
+CB_compare_to(CharBuf *self, Obj *other)
+{
+    return CB_compare(&self, &other);
+}
+
+bool_t
+CB_equals_str(CharBuf *self, const char *ptr, size_t size)
+{
+    if (self->size != size)
+        return false;
+    return (memcmp(self->ptr, ptr, self->size) == 0);
+}
+
+bool_t
+CB_ends_with(CharBuf *self, const CharBuf *postfix)
+{
+    return CB_ends_with_str(self, postfix->ptr, postfix->size);
+}
+
+bool_t
+CB_ends_with_str(CharBuf *self, const char *postfix, size_t postfix_len)
+{
+    if (postfix_len <= self->size) { 
+        char *start = CBEND(self) - postfix_len;
+        if (memcmp(start, postfix, postfix_len) == 0)
+            return true;
+    }
+
+    return false;
+}
+
+u32_t
+CB_trim(CharBuf *self)
+{
+    return CB_Trim_Top(self) + CB_Trim_Tail(self);
+}
+
+u32_t
+CB_trim_top(CharBuf *self)
+{
+    char *ptr   = self->ptr;
+    char *end   = CBEND(self);
+    u32_t count = 0;
+
+    while (ptr < end) {
+        u32_t code_point = StrHelp_decode_utf8_char(ptr);
+        if (!StrHelp_is_whitespace(code_point)) break;
+        ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr];
+        count++;
+    }
+
+    if (count) {
+        /* Copy string backwards. */
+        self->size = CBEND(self) - ptr;
+        memmove(self->ptr, ptr, self->size);
+    }
+
+    return count;
+}
+
+u32_t
+CB_trim_tail(CharBuf *self)
+{
+    u32_t         count    = 0;
+    const char   *ptr      = CBEND(self);
+    char *const   top      = self->ptr; 
+
+    while (NULL != (ptr = StrHelp_back_utf8_char(ptr, top))) {
+        u32_t code_point = StrHelp_decode_utf8_char(ptr);
+        if (!StrHelp_is_whitespace(code_point)) break;
+        self->size -= (CBEND(self) - ptr);
+        count++;
+    }
+
+    return count;
+}
+
+size_t
+CB_nip(CharBuf *self, size_t count)
+{
+    size_t       num_nipped = 0;
+    char *const  end        = CBEND(self);
+    char        *ptr        = self->ptr;
+    for ( ; ptr < end  && count--; ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr]) {
+        num_nipped++;
+    }
+    self->size = end - ptr;
+    memmove(self->ptr, ptr, self->size);
+    return num_nipped;
+}
+
+i32_t
+CB_nip_one(CharBuf *self)
+{
+    if (self->size == 0) {
+        return 0;
+    }
+    else {
+        i32_t retval = (i32_t)StrHelp_decode_utf8_char(self->ptr);
+        size_t consumed = StrHelp_UTF8_SKIP[*(u8_t*)self->ptr];
+        char *ptr = self->ptr + StrHelp_UTF8_SKIP[*(u8_t*)self->ptr];
+        self->size -= consumed;
+        memmove(self->ptr, ptr, self->size);
+        return retval;
+    }
+}
+
+size_t
+CB_chop(CharBuf *self, size_t count)
+{
+    size_t      num_chopped = 0;
+    const char *ptr         = CBEND(self);
+    char       *top         = self->ptr;
+    for (num_chopped = 0; num_chopped < count; num_chopped++) {
+        if (NULL == (ptr = StrHelp_back_utf8_char(ptr, top))) break;
+        self->size -= CBEND(self) - ptr;
+    }
+    return num_chopped;
+}
+
+size_t
+CB_length(CharBuf *self)
+{
+    size_t  len  = 0;
+    char   *ptr  = self->ptr; 
+    char   *end  = CBEND(self);
+    while (ptr < end) {
+        ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr];
+        len++;
+    }
+    return len;
+}
+
+size_t
+CB_truncate(CharBuf *self, size_t count)
+{
+    u32_t num_code_points;
+    ZombieCharBuf iterator = ZCB_make(self);
+    num_code_points = ZCB_Nip(&iterator, count);
+    self->size -= iterator.size;
+    return num_code_points;
+}
+
+u32_t
+CB_code_point_at(CharBuf *self, size_t tick)
+{
+    size_t count = 0;
+    char *ptr = self->ptr;
+    char *const end = CBEND(self);
+
+    for ( ; ptr < end; ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr]) {
+        if (count == tick) return StrHelp_decode_utf8_char(ptr);
+        count++;
+    }
+
+    return 0;
+}
+
+u32_t
+CB_code_point_from(CharBuf *self, size_t tick)
+{
+    size_t      count = 0;
+    const char *ptr   = CBEND(self);
+    char       *top   = self->ptr;
+
+    for (count = 0; count < tick; count++) {
+        if (NULL == (ptr = StrHelp_back_utf8_char(ptr, top))) return 0;
+    }
+    return StrHelp_decode_utf8_char(ptr);
+}
+
+CharBuf*
+CB_substring(CharBuf *self, size_t offset, size_t len)
+{
+    ZombieCharBuf iterator = ZCB_make(self);
+    char *sub_start;
+    size_t byte_len;
+
+    ZCB_Nip(&iterator, offset);
+    sub_start = iterator.ptr;
+    ZCB_Nip(&iterator, len);
+    byte_len = iterator.ptr - sub_start;
+
+    return CB_new_from_trusted_utf8(sub_start, byte_len);
+}
+
+int 
+CB_compare(const void *va, const void *vb) 
+{
+    const CharBuf *a = *(const CharBuf**)va;
+    const CharBuf *b = *(const CharBuf**)vb;
+    ZombieCharBuf iterator_a = ZCB_make(a);
+    ZombieCharBuf iterator_b = ZCB_make(b);
+    while (iterator_a.size && iterator_b.size) {
+        i32_t code_point_a = ZCB_Nip_One(&iterator_a);
+        i32_t code_point_b = ZCB_Nip_One(&iterator_b);
+        const i32_t comparison = code_point_a - code_point_b;
+        if (comparison != 0) return comparison;
+    }
+    if (iterator_a.size != iterator_b.size) {
+        return iterator_a.size < iterator_b.size ? -1 : 1;
+    }
+    return 0;
+}
+
+bool_t
+CB_less_than(const void *va, const void *vb)
+{
+    return CB_compare(va, vb) < 0 ? 1 : 0;
+}
+
+void
+CB_set_size(CharBuf *self, size_t size) { self->size = size;  }
+size_t
+CB_get_size(CharBuf *self)              { return self->size; }
+u8_t*
+CB_get_ptr8(CharBuf *self)            { return (u8_t*)self->ptr; }
+
+/*****************************************************************/
+
+ViewCharBuf*
+ViewCB_new_from_utf8(const char *utf8, size_t size)
+{
+    if (!StrHelp_utf8_valid(utf8, size))
+        S_die_invalid_utf8(utf8, size);
+    return ViewCB_new_from_trusted_utf8(utf8, size);
+}
+
+ViewCharBuf*
+ViewCB_new_from_trusted_utf8(const char *utf8, size_t size)
+{
+    ViewCharBuf *self = (ViewCharBuf*)VTable_Make_Obj(VIEWCHARBUF);
+    return ViewCB_init(self, utf8, size);
+}
+
+ViewCharBuf*
+ViewCB_init(ViewCharBuf *self, const char *utf8, size_t size)
+{
+    self->ptr  = (char*)utf8;
+    self->size = size;
+    self->cap  = 0;
+    return self;
+}
+
+void
+ViewCB_destroy(ViewCharBuf *self)
+{
+    /* Note that we do not free self->ptr, and that we invoke the
+     * SUPER_DESTROY with CHARBUF instead of VIEWCHARBUF. */
+    SUPER_DESTROY(self, CHARBUF);
+}
+
+void
+ViewCB_assign(ViewCharBuf *self, const CharBuf *other)
+{
+    self->ptr  = other->ptr;
+    self->size = other->size;
+}
+
+void
+ViewCB_assign_str(ViewCharBuf *self, const char *utf8, size_t size)
+{
+    if (!StrHelp_utf8_valid(utf8, size))
+        S_die_invalid_utf8(utf8, size);
+    self->ptr  = (char*)utf8;
+    self->size = size;
+}
+
+u32_t
+ViewCB_trim_top(ViewCharBuf *self)
+{
+    u32_t  count  = 0;
+    char  *ptr    = self->ptr;
+    char  *end    = CBEND(self);
+
+    while (ptr < end) {
+        u32_t code_point = StrHelp_decode_utf8_char(ptr);
+        if (!StrHelp_is_whitespace(code_point)) break;
+        ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr];
+        count++;
+    }
+
+    if (count) {
+        self->size = end - ptr;
+        self->ptr  = ptr;
+    }
+
+    return count;
+}
+
+size_t
+ViewCB_nip(ViewCharBuf *self, size_t count)
+{
+    size_t  num_nipped;
+    char   *ptr    = self->ptr; 
+    char   *end    = CBEND(self);
+    for (num_nipped = 0; 
+         ptr < end && count--; 
+         ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr]
+    ) {
+        num_nipped++;
+    }
+    self->size = end - ptr;
+    self->ptr  = ptr;
+    return num_nipped;
+}
+
+i32_t
+ViewCB_nip_one(ViewCharBuf *self)
+{
+    if (self->size == 0) {
+        return 0;
+    }
+    else {
+        i32_t retval = (i32_t)StrHelp_decode_utf8_char(self->ptr);
+        size_t consumed = StrHelp_UTF8_SKIP[*(u8_t*)self->ptr];
+        self->ptr  += consumed;
+        self->size -= consumed;
+        return retval;
+    }
+}
+
+void
+ViewCB_grow(ViewCharBuf *self, size_t size)
+{
+    UNUSED_VAR(self);
+    UNUSED_VAR(size);
+    THROW(ERR, "Can't grow a ViewCharBuf ('%o')", self);
+}
+
+/*****************************************************************/
+
+ZombieCharBuf
+ZCB_make_str(const char *ptr, size_t size) 
+{
+    ZombieCharBuf retval;
+    retval.ref.count    = 1;
+    retval.vtable       = ZOMBIECHARBUF;
+    retval.cap          = 0;
+    retval.size         = size;
+    retval.ptr          = (char*)ptr;
+    return retval;
+}
+
+ZombieCharBuf
+ZCB_make(const CharBuf *source) 
+{
+    return ZCB_make_str(source->ptr, source->size);
+}
+
+void
+ZCB_destroy(ZombieCharBuf *self)
+{
+    THROW(ERR, "Can't destroy a ZombieCharBuf ('%o')", self);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Object/CharBuf.c
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp Tue Sep 15 01:29:17 2009
@@ -0,0 +1,25 @@
+parcel Lucy;
+parcel Lucy;
+
+class Lucy::Test::Object::TestCharBuf cnick TestCB
+    extends Lucy::Object::Obj {
+
+    inert void
+    run_tests();
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c Tue Sep 15 01:29:17 2009
@@ -0,0 +1,398 @@
+#define C_LUCY_TESTCHARBUF
+#include "Lucy/Util/ToolSet.h"
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Object/TestCharBuf.h"
+
+static char smiley[] = { (char)0xE2, (char)0x98, (char)0xBA, 0 };
+static u32_t smiley_len = 3;
+
+static CharBuf*
+S_get_cb(char *string)
+{
+    return CB_new_from_utf8(string, strlen(string));
+}
+
+static void
+test_Cat(TestBatch *batch)
+{
+    CharBuf *wanted  = CB_newf("a%s", smiley);
+    CharBuf *got     = S_get_cb("");
+    
+    CB_Cat(got, wanted);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat");
+    DECREF(got);
+
+    got = S_get_cb("a");
+    CB_Cat_Char(got, 0x263A);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Char");
+    DECREF(got);
+
+    got = S_get_cb("a");
+    CB_Cat_Str(got, smiley, smiley_len);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Str");
+    DECREF(got);
+
+    got = S_get_cb("a");
+    CB_Cat_Trusted_Str(got, smiley, smiley_len);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Trusted_Str");
+    DECREF(got);
+
+    DECREF(wanted);
+}
+
+static void
+test_Mimic_and_Clone(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo");
+    CharBuf *got    = S_get_cb("bar");
+
+    CB_Mimic(got, (Obj*)wanted);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Mimic");
+    DECREF(got);
+
+    got = S_get_cb("bar");
+    CB_Mimic_Str(got, "foo", 3);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Mimic_Str");
+    DECREF(got);
+
+    got = CB_Clone(wanted);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Clone");
+    DECREF(got);
+
+    DECREF(wanted);
+}
+
+static void
+test_Code_Point_At_and_From(TestBatch *batch)
+{
+    u32_t code_points[] = { 'a', 0x263A, 0x263A, 'b', 0x263A, 'c' }; 
+    u32_t num_code_points = sizeof(code_points) / sizeof(u32_t);
+    CharBuf *string = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+    u32_t i;
+
+    for (i = 0; i < num_code_points; i++) {
+        u32_t from = num_code_points - i - 1;
+        ASSERT_INT_EQ(batch, CB_Code_Point_At(string, i), code_points[i],
+            "Code_Point_At %ld", (long)i);
+        ASSERT_INT_EQ(batch, CB_Code_Point_At(string, from), 
+            code_points[from], "Code_Point_From %ld", (long)from);
+    }
+
+    DECREF(string);
+}
+
+static void
+test_SubString(TestBatch *batch)
+{
+    CharBuf *string = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+    CharBuf *wanted = CB_newf("%sb%s", smiley, smiley);
+    CharBuf *got = CB_SubString(string, 2, 3);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "SubString");
+    DECREF(wanted);
+    DECREF(got);
+    DECREF(string);
+}
+
+static void
+test_Nip_and_Chop(TestBatch *batch)
+{
+    CharBuf *wanted;
+    CharBuf *got;
+
+    wanted = CB_newf("%sb%sc", smiley, smiley);
+    got    = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+    CB_Nip(got, 2);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Nip");
+    DECREF(wanted);
+    DECREF(got);
+
+    wanted = CB_newf("a%s%s", smiley, smiley);
+    got    = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+    CB_Chop(got, 3);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Chop");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+
+static void
+test_Truncate(TestBatch *batch)
+{
+    CharBuf *wanted = CB_newf("a%s", smiley, smiley);
+    CharBuf *got    = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+    CB_Truncate(got, 2);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Truncate");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_Trim(TestBatch *batch)
+{
+    u32_t spaces[] = { 
+        ' ',    '\t',   '\r',   '\n',   0x000B, 0x000C, 0x000D, 0x0085,
+        0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 
+        0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 
+        0x202F, 0x205F, 0x3000
+    };
+    u32_t num_spaces = sizeof(spaces) / sizeof(u32_t);
+    u32_t i;
+    CharBuf *got = CB_new(0);
+
+    /* Surround a smiley with lots of whitespace. */
+    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+    CB_Cat_Char(got, 0x263A);
+    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+
+    ASSERT_TRUE(batch, CB_Trim_Top(got), "Trim_Top returns true on success");
+    ASSERT_FALSE(batch, CB_Trim_Top(got), 
+        "Trim_Top returns false on failure");
+    ASSERT_TRUE(batch, CB_Trim_Tail(got), "Trim_Tail returns true on success");
+    ASSERT_FALSE(batch, CB_Trim_Tail(got), 
+        "Trim_Tail returns false on failure");
+    ASSERT_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len), 
+        "Trim_Top and Trim_Tail worked");
+
+    /* Build the spacey smiley again. */
+    CB_Truncate(got, 0);
+    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+    CB_Cat_Char(got, 0x263A);
+    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+
+    ASSERT_TRUE(batch, CB_Trim(got), "Trim returns true on success");
+    ASSERT_FALSE(batch, CB_Trim(got), "Trim returns false on failure");
+    ASSERT_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len), 
+        "Trim worked");
+
+    DECREF(got);
+}
+
+static void
+test_To_F64(TestBatch *batch)
+{
+    CharBuf *charbuf = S_get_cb("1.5");
+    double difference = 1.5 - CB_To_F64(charbuf);
+    if (difference < 0) { difference = 0 - difference; }
+    ASSERT_TRUE(batch, difference < 0.001, "To_F64");
+
+    CB_setf(charbuf, "-1.5");
+    difference = 1.5 + CB_To_F64(charbuf);
+    if (difference < 0) { difference = 0 - difference; }
+    ASSERT_TRUE(batch, difference < 0.001, "To_F64 negative");
+
+    DECREF(charbuf);
+}
+
+static void
+test_To_I64(TestBatch *batch)
+{
+    CharBuf *charbuf = S_get_cb("10");
+    ASSERT_TRUE(batch, CB_To_I64(charbuf) == 10, "To_I64");
+    CB_setf(charbuf, "-10");
+    ASSERT_TRUE(batch, CB_To_I64(charbuf) == -10, "To_I64 negative");
+    DECREF(charbuf);
+}
+
+
+static void
+test_vcatf_s(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar bizzle baz");
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %s baz", "bizzle");
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%s");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_null_string(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar [NULL] baz");
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %s baz", NULL);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%s NULL");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_cb(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar ZEKE baz");
+    CharBuf *catworthy = S_get_cb("ZEKE");
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %o baz", catworthy);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%o CharBuf");
+    DECREF(catworthy);
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_null_obj(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar [NULL] baz");
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %o baz", NULL);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%o NULL");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_i8(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar -3 baz");
+    i8_t num = -3;
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %i8 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%i8");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_i32(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar -100000 baz");
+    i32_t num = -100000;
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %i32 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%i32");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_i64(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar -5000000000 baz");
+    i64_t num = I64_C(-5000000000);
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %i64 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%i64");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_u8(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar 3 baz");
+    u8_t num = 3;
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %u8 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%u8");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_u32(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar 100000 baz");
+    u32_t num = 100000;
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %u32 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%u32");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_u64(TestBatch *batch)
+{
+    CharBuf *wanted = S_get_cb("foo bar 5000000000 baz");
+    u64_t num = U64_C(5000000000);
+    CharBuf *got = S_get_cb("foo ");
+    CB_catf(got, "bar %u64 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%u64");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_f64(TestBatch *batch)
+{
+    CharBuf *wanted;
+    char buf[64];
+    float num = 1.3f;
+    CharBuf *got = S_get_cb("foo ");
+    sprintf(buf, "foo bar %g baz", num);
+    wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
+    CB_catf(got, "bar %f64 baz", num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%f64");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+static void
+test_vcatf_x32(TestBatch *batch)
+{
+    CharBuf *wanted;
+    char buf[64];
+    unsigned long num = I32_MAX;
+    CharBuf *got = S_get_cb("foo ");
+#if (SIZEOF_LONG == 4)
+    sprintf(buf, "foo bar %.8lx baz", num);
+#elif (SIZEOF_INT == 4)
+    sprintf(buf, "foo bar %.8x baz", (unsigned)num);
+#endif
+    wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
+    CB_catf(got, "bar %x32 baz", (u32_t)num);
+    ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%x32");
+    DECREF(wanted);
+    DECREF(got);
+}
+
+void
+TestCB_run_tests()
+{
+    TestBatch *batch = Test_new_batch("TestCharBuf", 47, NULL);
+    PLAN(batch);
+
+    test_vcatf_s(batch);
+    test_vcatf_null_string(batch);
+    test_vcatf_cb(batch);
+    test_vcatf_null_obj(batch);
+    test_vcatf_i8(batch);
+    test_vcatf_i32(batch);
+    test_vcatf_i64(batch);
+    test_vcatf_u8(batch);
+    test_vcatf_u32(batch);
+    test_vcatf_u64(batch);
+    test_vcatf_f64(batch);
+    test_vcatf_x32(batch);
+    test_Cat(batch);
+    test_Mimic_and_Clone(batch);
+    test_Code_Point_At_and_From(batch);
+    test_SubString(batch);
+    test_Nip_and_Chop(batch);
+    test_Truncate(batch);
+    test_Trim(batch);
+    test_To_F64(batch);
+    test_To_I64(batch);
+
+    batch->destroy(batch);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+

Propchange: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm (added)
+++ lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm Tue Sep 15 01:29:17 2009
@@ -0,0 +1,82 @@
+use Lucy;
+
+1;
+
+__END__
+
+__BINDING__
+
+my $xs_code = <<'END_XS_CODE';
+MODULE = Lucy     PACKAGE = Lucy::Object::CharBuf
+
+SV*
+new(either_sv, sv)
+    SV *either_sv;
+    SV *sv;
+CODE:
+{
+    STRLEN size;
+    char *ptr = SvPVutf8(sv, size);
+    lucy_CharBuf *self = (lucy_CharBuf*)XSBind_new_blank_obj(either_sv);
+    lucy_CB_init(self, size);
+    Lucy_CB_Cat_Trusted_Str(self, ptr, size);
+    RETVAL = LUCY_OBJ_TO_SV_NOINC(self);
+}
+OUTPUT: RETVAL
+
+SV*
+_clone(self)
+    lucy_CharBuf *self;
+CODE:
+    RETVAL = LUCY_OBJ_TO_SV_NOINC(lucy_CB_clone(self));
+OUTPUT: RETVAL
+
+SV*
+to_perl(self)
+    lucy_CharBuf *self;
+CODE:
+    RETVAL = XSBind_cb_to_sv(self);
+OUTPUT: RETVAL
+
+MODULE = Lucy     PACKAGE = Lucy::Object::ViewCharBuf
+
+SV*
+_new(unused, sv)
+    SV *unused;
+    SV *sv;
+CODE:
+{
+    STRLEN size;
+    char *ptr = SvPVutf8(sv, size);
+    lucy_ViewCharBuf *self 
+        = lucy_ViewCB_new_from_trusted_utf8(ptr, size);
+    CHY_UNUSED_VAR(unused);
+    RETVAL = LUCY_OBJ_TO_SV_NOINC(self);
+}
+OUTPUT: RETVAL
+END_XS_CODE
+
+Boilerplater::Binding::Perl::Class->register(
+    parcel       => "Lucy",
+    class_name   => "Lucy::Object::CharBuf",
+    xs_code      => $xs_code,
+);
+
+__COPYRIGHT__
+
+    /**
+     * Copyright 2009 The Apache Software Foundation
+     *
+     * Licensed under the Apache License, Version 2.0 (the "License");
+     * you may not use this file except in compliance with the License.
+     * You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+     * implied.  See the License for the specific language governing
+     * permissions and limitations under the License.
+     */
+

Propchange: lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/lucy/trunk/perl/t/core/029-charbuf.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/core/029-charbuf.t?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/core/029-charbuf.t (added)
+++ lucene/lucy/trunk/perl/t/core/029-charbuf.t Tue Sep 15 01:29:17 2009
@@ -0,0 +1,6 @@
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestCharBuf");
+

Propchange: lucene/lucy/trunk/perl/t/core/029-charbuf.t
------------------------------------------------------------------------------
    svn:eol-style = native