You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2009/09/15 03:29:17 UTC
svn commit: r814956 - in /lucene/lucy/trunk: core/Lucy/Object/CharBuf.bp
core/Lucy/Object/CharBuf.c core/Lucy/Test/Object/TestCharBuf.bp
core/Lucy/Test/Object/TestCharBuf.c perl/lib/Lucy/Object/CharBuf.pm
perl/t/core/029-charbuf.t
Author: marvin
Date: Tue Sep 15 01:29:17 2009
New Revision: 814956
URL: http://svn.apache.org/viewvc?rev=814956&view=rev
Log:
Commit LUCY-44, adding CharBuf, ViewCharBuf and ZombieCharBuf.
Added:
lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp (with props)
lucene/lucy/trunk/core/Lucy/Object/CharBuf.c (with props)
lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp (with props)
lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c (with props)
lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm (with props)
lucene/lucy/trunk/perl/t/core/029-charbuf.t (with props)
Added: lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp Tue Sep 15 01:29:17 2009
@@ -0,0 +1,373 @@
+parcel Lucy;
+
+__C__
+#include <stdarg.h>
+__END_C__
+
+/**
+ * Growable buffer holding Unicode characters.
+ */
+
+class Lucy::Object::CharBuf cnick CB
+ extends Lucy::Object::Obj {
+
+ char *ptr;
+ size_t size;
+ size_t cap; /* allocated bytes, including terminating null */
+
+ inert incremented CharBuf*
+ new(size_t size);
+
+ inert CharBuf*
+ init(CharBuf *self, size_t size);
+
+ /** Return a new CharBuf which holds a copy of the passed-in string.
+ * Check for UTF-8 validity.
+ */
+ inert incremented CharBuf*
+ new_from_utf8(const char *utf8, size_t size);
+
+ /** Return a new CharBuf which holds a copy of the passed-in string. No
+ * validity checking is performed.
+ */
+ inert incremented CharBuf*
+ new_from_trusted_utf8(const char *utf8, size_t size);
+
+ /** Return a pointer to a new CharBuf which assumes ownership of the
+ * passed-in string. Check validity of supplied UTF-8.
+ */
+ inert incremented CharBuf*
+ new_steal_str(char *ptr, size_t size, size_t cap);
+
+ /** Return a pointer to a new CharBuf which assumes ownership of the
+ * passed-in string. Do not check validity of supplied UTF-8.
+ */
+ inert incremented CharBuf*
+ new_steal_from_trusted_str(char *ptr, size_t size, size_t cap);
+
+ /** Return a pointer to a new CharBuf which contains formatted data
+ * expanded according to CB_VCatF.
+ *
+ * Note: allowing a user-supplied format<code>pattern</code> string is a
+ * security hole and should not be allowed.
+ */
+ inert incremented CharBuf*
+ newf(const char *pattern, ...);
+
+ /** Perform lexical comparison of two CharBufs, with level of indirection
+ * set to please qsort and friends.
+ */
+ inert int
+ compare(const void *va, const void *vb);
+
+ /** Perform lexical comparison of two CharBufs, with level of indirection
+ * set to please qsort and friends, and return true if <code>a</code> is
+ * less than <code>b</code>.
+ */
+ inert bool_t
+ less_than(const void *va, const void *vb);
+
+ public void
+ Mimic(CharBuf *self, Obj *other);
+
+ void
+ Mimic_Str(CharBuf *self, const char *ptr, size_t size);
+
+ /** Concatenate the passed-in string onto the end of the CharBuf.
+ */
+ void
+ Cat_Str(CharBuf *self, const char *ptr, size_t size);
+
+ /** Concatenate the contents of <code>other</code> onto the end of the
+ * caller.
+ */
+ void
+ Cat(CharBuf *self, const CharBuf *other);
+
+ /** Concatenate formatted arguments. Similar to the printf family, but
+ * only accepts minimal options (just enough for decent error messages).
+ *
+ * Objects: %o
+ * char*: %s
+ * integers: %i8 %i32 %i64 %u8 %u32 %u64
+ * floats: %f64
+ * hex: %x32
+ *
+ * Note that all Lucy Objects, including CharBufs, are printed via
+ * %o (which invokes Obj_To_String()).
+ */
+ void
+ VCatF(CharBuf *self, const char *pattern, va_list args);
+
+ /** Invokes CB_VCatF to concatenate formatted arguments. Note that this
+ * is only a function and not a method.
+ */
+ inert void
+ catf(CharBuf *self, const char *pattern, ...);
+
+ /** Replaces the contents of the CharBuf using formatted arguments.
+ */
+ inert void
+ setf(CharBuf *self, const char *pattern, ...);
+
+ /** Concatenate one Unicode character onto the end of the CharBuf.
+ */
+ void
+ Cat_Char(CharBuf *self, u32_t code_point);
+
+ /** Replace all instances of one character for the other. For now, both
+ * the source and replacement code points must be ASCII.
+ */
+ i32_t
+ Swap_Chars(CharBuf *self, u32_t match, u32_t replacement);
+
+ public i64_t
+ To_I64(CharBuf *self);
+
+ /** Extract a 64-bit integer from a variable-base stringified version.
+ */
+ i64_t
+ BaseX_To_I64(CharBuf *self, u32_t base);
+
+ public double
+ To_F64(CharBuf *self);
+
+ /** Assign more memory to the CharBuf, if it doesn't already have enough
+ * room to hold a string of <code>size</code> bytes. Cannot shrink the
+ * allocation.
+ *
+ * @return a pointer to the raw buffer.
+ */
+ char*
+ Grow(CharBuf *self, size_t size);
+
+ /** Test whether the CharBuf starts with the content of another.
+ */
+ bool_t
+ Starts_With(CharBuf *self, const CharBuf *prefix);
+
+ /** Test whether the CharBuf starts with the passed-in string.
+ */
+ bool_t
+ Starts_With_Str(CharBuf *self, const char *prefix, size_t size);
+
+ /** Test whether the CharBuf ends with the content of another.
+ */
+ bool_t
+ Ends_With(CharBuf *self, const CharBuf *postfix);
+
+ /** Test whether the CharBuf ends with the passed-in string.
+ */
+ bool_t
+ Ends_With_Str(CharBuf *self, const char *postfix, size_t size);
+
+ /** Test whether the CharBuf matches the passed-in string.
+ */
+ bool_t
+ Equals_Str(CharBuf *self, const char *ptr, size_t size);
+
+ /** Return the number of Unicode code points in the object's string.
+ */
+ size_t
+ Length(CharBuf *self);
+
+ /** Set the CharBuf's <code>size</code> attribute.
+ */
+ void
+ Set_Size(CharBuf *self, size_t size);
+
+ /** Get the CharBuf's <code>size</code> attribute.
+ */
+ size_t
+ Get_Size(CharBuf *self);
+
+ /** Return the internal backing array for the CharBuf if its internal
+ * encoding is UTF-8. If it is not encoded as UTF-8 throw an exception.
+ */
+ u8_t*
+ Get_Ptr8(CharBuf *self);
+
+ /** Return a fresh copy of the string data in a CharBuf with an internal
+ * encoding of UTF-8.
+ */
+ CharBuf*
+ To_CB8(CharBuf *self);
+
+ public incremented CharBuf*
+ Clone(CharBuf *self);
+
+ public void
+ Destroy(CharBuf *self);
+
+ public bool_t
+ Equals(CharBuf *self, Obj *other);
+
+ public i32_t
+ Compare_To(CharBuf *self, Obj *other);
+
+ public i32_t
+ Hash_Code(CharBuf *self);
+
+ public incremented CharBuf*
+ To_String(CharBuf *self);
+
+ public incremented CharBuf*
+ Load(CharBuf *self, Obj *dump);
+
+ /** Remove Unicode whitespace characters from both top and tail.
+ */
+ u32_t
+ Trim(CharBuf *self);
+
+ /** Remove leading Unicode whitespace.
+ */
+ u32_t
+ Trim_Top(CharBuf *self);
+
+ /** Remove trailing Unicode whitespace.
+ */
+ u32_t
+ Trim_Tail(CharBuf *self);
+
+ /** Remove characters (measured in code points) from the top of the
+ * CharBuf. Returns the number nipped.
+ */
+ size_t
+ Nip(CharBuf *self, size_t count);
+
+ /** Remove one character from the top of the CharBuf. Returns the code
+ * point, or 0 if the string was empty.
+ */
+ i32_t
+ Nip_One(CharBuf *self);
+
+ /** Remove characters (measured in code points) from the end of the
+ * CharBuf. Returns the number chopped.
+ */
+ size_t
+ Chop(CharBuf *self, size_t count);
+
+ /** Truncate the CharBuf so that it contains no more than
+ * <code>count</code>characters.
+ *
+ * @param count Maximum new length, in Unicode code points.
+ * @return The number of code points left in the string after truncation.
+ */
+ size_t
+ Truncate(CharBuf *self, size_t count);
+
+ /** Return the Unicode code point at the specified number of code points
+ * in. Return 0 if the string length is exceeded. (XXX It would be
+ * better to throw an exception, but that's not practical with UTF-8 and
+ * no cached length.)
+ */
+ u32_t
+ Code_Point_At(CharBuf *self, size_t tick);
+
+ /** Return the Unicode code point at the specified number of code points
+ * counted backwards from the end of the string. Return 0 if outside the
+ * string.
+ */
+ u32_t
+ Code_Point_From(CharBuf *self, size_t tick);
+
+ /** Return a newly allocated CharBuf containing a copy of the indicated
+ * substring.
+ * @param offset Offset from the top, in code points.
+ * @param len The desired length of the substring, in code points.
+ */
+ incremented CharBuf*
+ SubString(CharBuf *self, size_t offset, size_t len);
+
+ /** Concatenate the supplied text onto the end of the CharBuf. Don't
+ * check for UTF-8 validity.
+ */
+ void
+ Cat_Trusted_Str(CharBuf *self, const char *ptr, size_t size);
+}
+
+class Lucy::Object::ViewCharBuf cnick ViewCB
+ extends Lucy::Object::CharBuf {
+
+ inert incremented ViewCharBuf*
+ new_from_utf8(const char *utf8, size_t size);
+
+ inert incremented ViewCharBuf*
+ new_from_trusted_utf8(const char *utf8, size_t size);
+
+ inert ViewCharBuf*
+ init(ViewCharBuf *self, const char *utf8, size_t size);
+
+ void
+ Assign(ViewCharBuf *self, const CharBuf *other);
+
+ void
+ Assign_Str(ViewCharBuf *self, const char *utf8, size_t size);
+
+ u32_t
+ Trim_Top(ViewCharBuf *self);
+
+ size_t
+ Nip(ViewCharBuf *self, size_t count);
+
+ i32_t
+ Nip_One(ViewCharBuf *self);
+
+ /** Throws an error. */
+ void
+ Grow(ViewCharBuf *self, size_t size);
+
+ public void
+ Destroy(ViewCharBuf *self);
+}
+
+class Lucy::Object::ZombieCharBuf cnick ZCB
+ extends Lucy::Object::ViewCharBuf {
+
+ /** Throws an error.
+ */
+ public void
+ Destroy(ZombieCharBuf *self);
+}
+
+__C__
+
+#define LUCY_ZCB_BLANK \
+ { LUCY_ZOMBIECHARBUF, {1}, "", 0, 0 }
+#define LUCY_ZCB_LITERAL(string) \
+ { LUCY_ZOMBIECHARBUF, {1}, string "", sizeof(string) -1, sizeof(string) }
+
+extern lucy_ZombieCharBuf LUCY_ZCB_EMPTY;
+
+/* Simple functions which allows assignment idiom for ZombieCharBuf values.
+ */
+lucy_ZombieCharBuf
+lucy_ZCB_make_str(const char *ptr, size_t size);
+
+lucy_ZombieCharBuf
+lucy_ZCB_make(const lucy_CharBuf *source);
+
+#ifdef LUCY_USE_SHORT_NAMES
+ #define ZCB_BLANK LUCY_ZCB_BLANK
+ #define ZCB_LITERAL(_string) LUCY_ZCB_LITERAL(_string)
+ #define EMPTY LUCY_ZCB_EMPTY
+ #define ZCB_make lucy_ZCB_make
+ #define ZCB_make_str lucy_ZCB_make_str
+#endif
+__END_C__
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Object/CharBuf.bp
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Object/CharBuf.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Object/CharBuf.c?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Object/CharBuf.c (added)
+++ lucene/lucy/trunk/core/Lucy/Object/CharBuf.c Tue Sep 15 01:29:17 2009
@@ -0,0 +1,906 @@
+#define C_LUCY_CHARBUF
+#define C_LUCY_VIEWCHARBUF
+#define C_LUCY_ZOMBIECHARBUF
+#define LUCY_USE_SHORT_NAMES
+#define CHY_USE_SHORT_NAMES
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "Lucy/Object/VTable.h"
+#include "Lucy/Object/CharBuf.h"
+
+#include "Lucy/Object/Err.h"
+#include "Lucy/Util/Memory.h"
+#include "Lucy/Util/StringHelper.h"
+
+/* The end of the string (address of terminating NULL). */
+#define CBEND(self) ((self)->ptr + (self)->size)
+
+/* Reallocate if necessary. */
+static INLINE void
+SI_maybe_grow(CharBuf *self, size_t new_size);
+
+/* Maximum number of characters in a stringified 64-bit integer, including
+ * minus sign if negative.
+ */
+#define MAX_I64_CHARS 20
+
+/* Helper function for throwing invalid UTF-8 error. Since THROW uses
+ * a CharBuf internally, calling THROW with invalid UTF-8 would create an
+ * infinite loop -- so we fwrite some of the bogus text to stderr invoke
+ * THROW with a generic message. */
+static void
+S_die_invalid_utf8(const char *text, size_t size);
+
+/* Helper function for throwing invalid pattern error. */
+static void
+S_die_invalid_pattern(const char *pattern);
+
+ZombieCharBuf EMPTY = ZCB_BLANK;
+
+CharBuf*
+CB_new(size_t size)
+{
+ CharBuf *self = (CharBuf*)VTable_Make_Obj(CHARBUF);
+ return CB_init(self, size);
+}
+
+CharBuf*
+CB_init(CharBuf *self, size_t size)
+{
+ /* Derive. */
+ self->ptr = (char*)MALLOCATE(size + 1);
+
+ /* Init. */
+ *self->ptr = '\0'; /* Empty string. */
+
+ /* Assign. */
+ self->size = 0;
+ self->cap = size + 1;
+
+ return self;
+}
+
+CharBuf*
+CB_new_from_utf8(const char *ptr, size_t size)
+{
+ if (!StrHelp_utf8_valid(ptr, size))
+ S_die_invalid_utf8(ptr, size);
+ return CB_new_from_trusted_utf8(ptr, size);
+}
+
+CharBuf*
+CB_new_from_trusted_utf8(const char *ptr, size_t size)
+{
+ CharBuf *self = (CharBuf*)VTable_Make_Obj(CHARBUF);
+
+ /* Derive. */
+ self->ptr = (char*)MALLOCATE(size + 1);
+
+ /* Copy. */
+ memcpy(self->ptr, ptr, size);
+
+ /* Assign. */
+ self->size = size;
+ self->cap = size + 1;
+ self->ptr[size] = '\0'; /* Null terminate. */
+
+ return self;
+}
+
+CharBuf*
+CB_new_steal_from_trusted_str(char *ptr, size_t size, size_t cap)
+{
+ CharBuf *self = (CharBuf*)VTable_Make_Obj(CHARBUF);
+ self->ptr = ptr;
+ self->size = size;
+ self->cap = cap;
+ return self;
+}
+
+CharBuf*
+CB_new_steal_str(char *ptr, size_t size, size_t cap)
+{
+ StrHelp_utf8_valid(ptr, size);
+ return CB_new_steal_from_trusted_str(ptr, size, cap);
+}
+
+CharBuf*
+CB_newf(const char *pattern, ...) {
+ CharBuf *self = CB_new(strlen(pattern));
+ va_list args;
+ va_start(args, pattern);
+ CB_VCatF(self, pattern, args);
+ va_end(args);
+ return self;
+}
+
+void
+CB_destroy(CharBuf *self)
+{
+ FREEMEM(self->ptr);
+ SUPER_DESTROY(self, CHARBUF);
+}
+
+i32_t
+CB_hash_code(CharBuf *self)
+{
+ u32_t hashvalue = 5381;
+ ZombieCharBuf iterator = ZCB_make(self);
+
+ {
+ const CB_nip_one_t nip_one
+ = (CB_nip_one_t)METHOD(iterator.vtable, CB, Nip_One);
+ while (iterator.size) {
+ u32_t code_point = (u32_t)nip_one((CharBuf*)&iterator);
+ hashvalue = ((hashvalue << 5) + hashvalue) ^ code_point;
+ }
+ }
+
+ return (i32_t) hashvalue;
+}
+
+static INLINE void
+SI_maybe_grow(CharBuf *self, size_t new_size)
+{
+ /* Bail out if the buffer's already at least as big as required. */
+ if (self->cap > new_size)
+ return;
+
+ self->ptr = (char*)REALLOCATE(self->ptr, new_size + 1);
+ self->cap = new_size + 1;
+}
+
+char*
+CB_grow(CharBuf *self, size_t new_size)
+{
+ SI_maybe_grow(self, new_size);
+ return self->ptr;
+}
+
+static void
+S_die_invalid_utf8(const char *text, size_t size)
+{
+ fprintf(stderr, "Invalid UTF-8, aborting: '");
+ fwrite(text, sizeof(char), size < 200 ? size : 200, stderr);
+ if (size > 200) fwrite("[...]", sizeof(char), 5, stderr);
+ fprintf(stderr, "'\n");
+ THROW(ERR, "Invalid UTF-8.");
+}
+
+static void
+S_die_invalid_pattern(const char *pattern)
+{
+ size_t pattern_len = strlen(pattern);
+ fprintf(stderr, "Invalid pattern, aborting: '");
+ fwrite(pattern, sizeof(char), pattern_len, stderr);
+ fprintf(stderr, "'\n");
+ THROW(ERR, "Invalid pattern.");
+}
+
+void
+CB_setf(CharBuf *self, const char *pattern, ...)
+{
+ va_list args;
+ CB_Set_Size(self, 0);
+ va_start(args, pattern);
+ CB_VCatF(self, pattern, args);
+ va_end(args);
+}
+
+void
+CB_catf(CharBuf *self, const char *pattern, ...)
+{
+ va_list args;
+ va_start(args, pattern);
+ CB_VCatF(self, pattern, args);
+ va_end(args);
+}
+
+void
+CB_vcatf(CharBuf *self, const char *pattern, va_list args)
+{
+ size_t pattern_len = strlen(pattern);
+ const char *pattern_start = pattern;
+ const char *pattern_end = pattern + pattern_len;
+ char buf[64];
+
+ for ( ; pattern < pattern_end; pattern++) {
+ const char *slice_end = pattern;
+
+ /* Consume all characters leading up to a '%'. */
+ while (slice_end < pattern_end && *slice_end != '%') { slice_end++; }
+ if (pattern != slice_end) {
+ size_t size = slice_end - pattern;
+ CB_Cat_Trusted_Str(self, pattern, size);
+ pattern = slice_end;
+ }
+
+ if (pattern < pattern_end) {
+ pattern++; /* Move past '%'. */
+
+ switch (*pattern) {
+ case '%': {
+ CB_Cat_Trusted_Str(self, "%", 1);
+ }
+ break;
+
+ case 'o': {
+ Obj *obj = va_arg(args, Obj*);
+ if (!obj) {
+ CB_Cat_Trusted_Str(self, "[NULL]", 6);
+ }
+ else if (Obj_Is_A(obj, CHARBUF)) {
+ CB_Cat(self, (CharBuf*)obj);
+ }
+ else {
+ CharBuf *string = Obj_To_String(obj);
+ CB_Cat(self, string);
+ DECREF(string);
+ }
+ }
+ break;
+
+ case 'i': {
+ u64_t val = 0;
+ size_t size;
+
+ if (pattern[1] == '8') {
+ val = va_arg(args, i32_t);
+ pattern++;
+ }
+ else if (pattern[1] == '3' && pattern[2] == '2') {
+ val = va_arg(args, i32_t);
+ pattern += 2;
+ }
+ else if (pattern[1] == '6' && pattern[2] == '4') {
+ val = va_arg(args, i64_t);
+ pattern += 2;
+ }
+ else {
+ S_die_invalid_pattern(pattern_start);
+ }
+ size = sprintf(buf, "%" I64P, val);
+ CB_Cat_Trusted_Str(self, buf, size);
+ }
+ break;
+
+ case 'u': {
+ u64_t val = 0;
+ size_t size;
+
+ if (pattern[1] == '8') {
+ val = va_arg(args, u32_t);
+ pattern += 1;
+ }
+ else if (pattern[1] == '3' && pattern[2] == '2') {
+ val = va_arg(args, u32_t);
+ pattern += 2;
+ }
+ else if (pattern[1] == '6' && pattern[2] == '4') {
+ val = va_arg(args, u64_t);
+ pattern += 2;
+ }
+ else {
+ S_die_invalid_pattern(pattern_start);
+ }
+ size = sprintf(buf, "%" U64P, val);
+ CB_Cat_Trusted_Str(self, buf, size);
+ }
+ break;
+
+ case 'f': {
+ if (pattern[1] == '6' && pattern[2] == '4') {
+ double num = va_arg(args, double);
+ size_t size = sprintf(buf, "%g", num);
+ CB_Cat_Trusted_Str(self, buf, size);
+ pattern += 2;
+ }
+ else {
+ S_die_invalid_pattern(pattern_start);
+ }
+ }
+ break;
+
+ case 'x': {
+ if (pattern[1] == '3' && pattern[2] == '2') {
+ unsigned long val = va_arg(args, u32_t);
+ size_t size = sprintf(buf, "%.8lx", val);
+ CB_Cat_Trusted_Str(self, buf, size);
+ pattern += 2;
+ }
+ else {
+ S_die_invalid_pattern(pattern_start);
+ }
+ }
+ break;
+
+ case 's': {
+ char *string = va_arg(args, char*);
+ if (string == NULL) {
+ CB_Cat_Trusted_Str(self, "[NULL]", 6);
+ }
+ else {
+ size_t size = strlen(string);
+ if (StrHelp_utf8_valid(string, size)) {
+ CB_Cat_Trusted_Str(self, string, size);
+ }
+ else {
+ CB_Cat_Trusted_Str(self, "[INVALID UTF8]", 14);
+ }
+ }
+ }
+ break;
+
+ /* Assume NULL-terminated pattern string, which eliminates the
+ * need for bounds checking if '%' is the last visible
+ * character. */
+ default: {
+ S_die_invalid_pattern(pattern_start);
+ }
+ }
+ }
+ }
+}
+
+CharBuf*
+CB_to_string(CharBuf *self)
+{
+ return CB_new_from_trusted_utf8(self->ptr, self->size);
+}
+
+void
+CB_cat_char(CharBuf *self, u32_t code_point)
+{
+ const size_t MIN_SAFE_ROOM = 4 + 1;
+ if (self->size + MIN_SAFE_ROOM > self->cap) {
+ SI_maybe_grow(self, self->size + 10);
+ }
+ self->size += StrHelp_encode_utf8_char(code_point, (u8_t*)CBEND(self));
+ *CBEND(self) = '\0';
+}
+
+i32_t
+CB_swap_chars(CharBuf *self, u32_t match, u32_t replacement)
+{
+ i32_t num_swapped = 0;
+
+ if (match > 127) {
+ THROW(ERR, "match point too high: %u32", match);
+ }
+ else if (replacement > 127) {
+ THROW(ERR, "replacement code point too high: %u32", replacement);
+ }
+ else {
+ char *ptr = self->ptr;
+ char *const limit = CBEND(self);
+ for ( ; ptr < limit; ptr++) {
+ if (*ptr == (char)match) {
+ *ptr = (char)replacement;
+ num_swapped++;
+ }
+ }
+ }
+
+ return num_swapped;
+}
+
+i64_t
+CB_to_i64(CharBuf *self)
+{
+ return CB_BaseX_To_I64(self, 10);
+}
+
+i64_t
+CB_basex_to_i64(CharBuf *self, u32_t base)
+{
+ ZombieCharBuf iterator = ZCB_make(self);
+ i64_t retval = 0;
+ bool_t is_negative = false;
+
+ /* Advance past minus sign. */
+ if (ZCB_Code_Point_At(&iterator, 0) == '-') {
+ ZCB_Nip_One(&iterator);
+ is_negative = true;
+ }
+
+ /* Accumulate. */
+ while (iterator.size) {
+ i32_t code_point = ZCB_Nip_One(&iterator);
+ if (isalnum(code_point)) {
+ i32_t addend = isdigit(code_point)
+ ? code_point - '0'
+ : tolower(code_point) - 'a' + 10;
+ if (addend > (i32_t)base) break;
+ retval *= base;
+ retval += addend;
+ }
+ else {
+ break;
+ }
+ }
+
+ /* Apply minus sign. */
+ if (is_negative) retval = 0 - retval;
+
+ return retval;
+}
+
+double
+CB_to_f64(CharBuf *self)
+{
+ return strtod(self->ptr, NULL);
+}
+
+CharBuf*
+CB_to_cb8(CharBuf *self)
+{
+ return CB_new_from_trusted_utf8(self->ptr, self->size);
+}
+
+CharBuf*
+CB_clone(CharBuf *self)
+{
+ return CB_new_from_trusted_utf8(self->ptr, self->size);
+}
+
+CharBuf*
+CB_load(CharBuf *self, Obj *dump)
+{
+ CharBuf *source = (CharBuf*)ASSERT_IS_A(dump, CHARBUF);
+ UNUSED_VAR(self);
+ return CB_Clone(source);
+}
+
+void
+CB_mimic_str(CharBuf *self, const char* ptr, size_t size)
+{
+ if (!StrHelp_utf8_valid(ptr, size))
+ S_die_invalid_utf8(ptr, size);
+ SI_maybe_grow(self, size);
+ memmove(self->ptr, ptr, size);
+ self->size = size;
+ self->ptr[size] = '\0';
+}
+
+void
+CB_mimic(CharBuf *self, Obj *other)
+{
+ CharBuf *evil_twin = (CharBuf*)ASSERT_IS_A(other, CHARBUF);
+ SI_maybe_grow(self, evil_twin->size);
+ memmove(self->ptr, evil_twin->ptr, evil_twin->size);
+ self->size = evil_twin->size;
+ self->ptr[evil_twin->size] = '\0';
+}
+
+void
+CB_cat_str(CharBuf *self, const char* ptr, size_t size)
+{
+ if (!StrHelp_utf8_valid(ptr, size))
+ S_die_invalid_utf8(ptr, size);
+ CB_cat_trusted_str(self, ptr, size);
+}
+
+void
+CB_cat_trusted_str(CharBuf *self, const char* ptr, size_t size)
+{
+ const size_t new_size = self->size + size;
+ SI_maybe_grow(self, new_size);
+ memcpy((self->ptr + self->size), ptr, size);
+ self->size = new_size;
+ self->ptr[new_size] = '\0';
+}
+
+void
+CB_cat(CharBuf *self, const CharBuf *other)
+{
+ const size_t new_size = self->size + other->size;
+ SI_maybe_grow(self, new_size);
+ memcpy((self->ptr + self->size), other->ptr, other->size);
+ self->size = new_size;
+ self->ptr[new_size] = '\0';
+}
+
+bool_t
+CB_starts_with(CharBuf *self, const CharBuf *prefix)
+{
+ return CB_starts_with_str(self, prefix->ptr, prefix->size);
+}
+
+bool_t
+CB_starts_with_str(CharBuf *self, const char *prefix, size_t size)
+{
+ if ( size <= self->size
+ && (memcmp(self->ptr, prefix, size) == 0)
+ ) {
+ return true;
+ }
+ else {
+ return false;
+ }
+}
+
+bool_t
+CB_equals(CharBuf *self, Obj *other)
+{
+ CharBuf *const evil_twin = (CharBuf*)other;
+ if (evil_twin == self) return true;
+ if (!Obj_Is_A(evil_twin, CHARBUF)) return false;
+ return CB_equals_str(self, evil_twin->ptr, evil_twin->size);
+}
+
+i32_t
+CB_compare_to(CharBuf *self, Obj *other)
+{
+ return CB_compare(&self, &other);
+}
+
+bool_t
+CB_equals_str(CharBuf *self, const char *ptr, size_t size)
+{
+ if (self->size != size)
+ return false;
+ return (memcmp(self->ptr, ptr, self->size) == 0);
+}
+
+bool_t
+CB_ends_with(CharBuf *self, const CharBuf *postfix)
+{
+ return CB_ends_with_str(self, postfix->ptr, postfix->size);
+}
+
+bool_t
+CB_ends_with_str(CharBuf *self, const char *postfix, size_t postfix_len)
+{
+ if (postfix_len <= self->size) {
+ char *start = CBEND(self) - postfix_len;
+ if (memcmp(start, postfix, postfix_len) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+u32_t
+CB_trim(CharBuf *self)
+{
+ return CB_Trim_Top(self) + CB_Trim_Tail(self);
+}
+
+u32_t
+CB_trim_top(CharBuf *self)
+{
+ char *ptr = self->ptr;
+ char *end = CBEND(self);
+ u32_t count = 0;
+
+ while (ptr < end) {
+ u32_t code_point = StrHelp_decode_utf8_char(ptr);
+ if (!StrHelp_is_whitespace(code_point)) break;
+ ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr];
+ count++;
+ }
+
+ if (count) {
+ /* Copy string backwards. */
+ self->size = CBEND(self) - ptr;
+ memmove(self->ptr, ptr, self->size);
+ }
+
+ return count;
+}
+
+u32_t
+CB_trim_tail(CharBuf *self)
+{
+ u32_t count = 0;
+ const char *ptr = CBEND(self);
+ char *const top = self->ptr;
+
+ while (NULL != (ptr = StrHelp_back_utf8_char(ptr, top))) {
+ u32_t code_point = StrHelp_decode_utf8_char(ptr);
+ if (!StrHelp_is_whitespace(code_point)) break;
+ self->size -= (CBEND(self) - ptr);
+ count++;
+ }
+
+ return count;
+}
+
+size_t
+CB_nip(CharBuf *self, size_t count)
+{
+ size_t num_nipped = 0;
+ char *const end = CBEND(self);
+ char *ptr = self->ptr;
+ for ( ; ptr < end && count--; ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr]) {
+ num_nipped++;
+ }
+ self->size = end - ptr;
+ memmove(self->ptr, ptr, self->size);
+ return num_nipped;
+}
+
+i32_t
+CB_nip_one(CharBuf *self)
+{
+ if (self->size == 0) {
+ return 0;
+ }
+ else {
+ i32_t retval = (i32_t)StrHelp_decode_utf8_char(self->ptr);
+ size_t consumed = StrHelp_UTF8_SKIP[*(u8_t*)self->ptr];
+ char *ptr = self->ptr + StrHelp_UTF8_SKIP[*(u8_t*)self->ptr];
+ self->size -= consumed;
+ memmove(self->ptr, ptr, self->size);
+ return retval;
+ }
+}
+
+size_t
+CB_chop(CharBuf *self, size_t count)
+{
+ size_t num_chopped = 0;
+ const char *ptr = CBEND(self);
+ char *top = self->ptr;
+ for (num_chopped = 0; num_chopped < count; num_chopped++) {
+ if (NULL == (ptr = StrHelp_back_utf8_char(ptr, top))) break;
+ self->size -= CBEND(self) - ptr;
+ }
+ return num_chopped;
+}
+
+size_t
+CB_length(CharBuf *self)
+{
+ size_t len = 0;
+ char *ptr = self->ptr;
+ char *end = CBEND(self);
+ while (ptr < end) {
+ ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr];
+ len++;
+ }
+ return len;
+}
+
+size_t
+CB_truncate(CharBuf *self, size_t count)
+{
+ u32_t num_code_points;
+ ZombieCharBuf iterator = ZCB_make(self);
+ num_code_points = ZCB_Nip(&iterator, count);
+ self->size -= iterator.size;
+ return num_code_points;
+}
+
+u32_t
+CB_code_point_at(CharBuf *self, size_t tick)
+{
+ size_t count = 0;
+ char *ptr = self->ptr;
+ char *const end = CBEND(self);
+
+ for ( ; ptr < end; ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr]) {
+ if (count == tick) return StrHelp_decode_utf8_char(ptr);
+ count++;
+ }
+
+ return 0;
+}
+
+u32_t
+CB_code_point_from(CharBuf *self, size_t tick)
+{
+ size_t count = 0;
+ const char *ptr = CBEND(self);
+ char *top = self->ptr;
+
+ for (count = 0; count < tick; count++) {
+ if (NULL == (ptr = StrHelp_back_utf8_char(ptr, top))) return 0;
+ }
+ return StrHelp_decode_utf8_char(ptr);
+}
+
+CharBuf*
+CB_substring(CharBuf *self, size_t offset, size_t len)
+{
+ ZombieCharBuf iterator = ZCB_make(self);
+ char *sub_start;
+ size_t byte_len;
+
+ ZCB_Nip(&iterator, offset);
+ sub_start = iterator.ptr;
+ ZCB_Nip(&iterator, len);
+ byte_len = iterator.ptr - sub_start;
+
+ return CB_new_from_trusted_utf8(sub_start, byte_len);
+}
+
+int
+CB_compare(const void *va, const void *vb)
+{
+ const CharBuf *a = *(const CharBuf**)va;
+ const CharBuf *b = *(const CharBuf**)vb;
+ ZombieCharBuf iterator_a = ZCB_make(a);
+ ZombieCharBuf iterator_b = ZCB_make(b);
+ while (iterator_a.size && iterator_b.size) {
+ i32_t code_point_a = ZCB_Nip_One(&iterator_a);
+ i32_t code_point_b = ZCB_Nip_One(&iterator_b);
+ const i32_t comparison = code_point_a - code_point_b;
+ if (comparison != 0) return comparison;
+ }
+ if (iterator_a.size != iterator_b.size) {
+ return iterator_a.size < iterator_b.size ? -1 : 1;
+ }
+ return 0;
+}
+
+bool_t
+CB_less_than(const void *va, const void *vb)
+{
+ return CB_compare(va, vb) < 0 ? 1 : 0;
+}
+
+void
+CB_set_size(CharBuf *self, size_t size) { self->size = size; }
+size_t
+CB_get_size(CharBuf *self) { return self->size; }
+u8_t*
+CB_get_ptr8(CharBuf *self) { return (u8_t*)self->ptr; }
+
+/*****************************************************************/
+
+ViewCharBuf*
+ViewCB_new_from_utf8(const char *utf8, size_t size)
+{
+ if (!StrHelp_utf8_valid(utf8, size))
+ S_die_invalid_utf8(utf8, size);
+ return ViewCB_new_from_trusted_utf8(utf8, size);
+}
+
+ViewCharBuf*
+ViewCB_new_from_trusted_utf8(const char *utf8, size_t size)
+{
+ ViewCharBuf *self = (ViewCharBuf*)VTable_Make_Obj(VIEWCHARBUF);
+ return ViewCB_init(self, utf8, size);
+}
+
+ViewCharBuf*
+ViewCB_init(ViewCharBuf *self, const char *utf8, size_t size)
+{
+ self->ptr = (char*)utf8;
+ self->size = size;
+ self->cap = 0;
+ return self;
+}
+
+void
+ViewCB_destroy(ViewCharBuf *self)
+{
+ /* Note that we do not free self->ptr, and that we invoke the
+ * SUPER_DESTROY with CHARBUF instead of VIEWCHARBUF. */
+ SUPER_DESTROY(self, CHARBUF);
+}
+
+void
+ViewCB_assign(ViewCharBuf *self, const CharBuf *other)
+{
+ self->ptr = other->ptr;
+ self->size = other->size;
+}
+
+void
+ViewCB_assign_str(ViewCharBuf *self, const char *utf8, size_t size)
+{
+ if (!StrHelp_utf8_valid(utf8, size))
+ S_die_invalid_utf8(utf8, size);
+ self->ptr = (char*)utf8;
+ self->size = size;
+}
+
+u32_t
+ViewCB_trim_top(ViewCharBuf *self)
+{
+ u32_t count = 0;
+ char *ptr = self->ptr;
+ char *end = CBEND(self);
+
+ while (ptr < end) {
+ u32_t code_point = StrHelp_decode_utf8_char(ptr);
+ if (!StrHelp_is_whitespace(code_point)) break;
+ ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr];
+ count++;
+ }
+
+ if (count) {
+ self->size = end - ptr;
+ self->ptr = ptr;
+ }
+
+ return count;
+}
+
+size_t
+ViewCB_nip(ViewCharBuf *self, size_t count)
+{
+ size_t num_nipped;
+ char *ptr = self->ptr;
+ char *end = CBEND(self);
+ for (num_nipped = 0;
+ ptr < end && count--;
+ ptr += StrHelp_UTF8_SKIP[*(u8_t*)ptr]
+ ) {
+ num_nipped++;
+ }
+ self->size = end - ptr;
+ self->ptr = ptr;
+ return num_nipped;
+}
+
+i32_t
+ViewCB_nip_one(ViewCharBuf *self)
+{
+ if (self->size == 0) {
+ return 0;
+ }
+ else {
+ i32_t retval = (i32_t)StrHelp_decode_utf8_char(self->ptr);
+ size_t consumed = StrHelp_UTF8_SKIP[*(u8_t*)self->ptr];
+ self->ptr += consumed;
+ self->size -= consumed;
+ return retval;
+ }
+}
+
+void
+ViewCB_grow(ViewCharBuf *self, size_t size)
+{
+ UNUSED_VAR(self);
+ UNUSED_VAR(size);
+ THROW(ERR, "Can't grow a ViewCharBuf ('%o')", self);
+}
+
+/*****************************************************************/
+
+ZombieCharBuf
+ZCB_make_str(const char *ptr, size_t size)
+{
+ ZombieCharBuf retval;
+ retval.ref.count = 1;
+ retval.vtable = ZOMBIECHARBUF;
+ retval.cap = 0;
+ retval.size = size;
+ retval.ptr = (char*)ptr;
+ return retval;
+}
+
+ZombieCharBuf
+ZCB_make(const CharBuf *source)
+{
+ return ZCB_make_str(source->ptr, source->size);
+}
+
+void
+ZCB_destroy(ZombieCharBuf *self)
+{
+ THROW(ERR, "Can't destroy a ZombieCharBuf ('%o')", self);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Object/CharBuf.c
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp Tue Sep 15 01:29:17 2009
@@ -0,0 +1,25 @@
+parcel Lucy;
+parcel Lucy;
+
+class Lucy::Test::Object::TestCharBuf cnick TestCB
+ extends Lucy::Object::Obj {
+
+ inert void
+ run_tests();
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.bp
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c (added)
+++ lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c Tue Sep 15 01:29:17 2009
@@ -0,0 +1,398 @@
+#define C_LUCY_TESTCHARBUF
+#include "Lucy/Util/ToolSet.h"
+#include <stdarg.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "Lucy/Test.h"
+#include "Lucy/Test/Object/TestCharBuf.h"
+
+static char smiley[] = { (char)0xE2, (char)0x98, (char)0xBA, 0 };
+static u32_t smiley_len = 3;
+
+static CharBuf*
+S_get_cb(char *string)
+{
+ return CB_new_from_utf8(string, strlen(string));
+}
+
+static void
+test_Cat(TestBatch *batch)
+{
+ CharBuf *wanted = CB_newf("a%s", smiley);
+ CharBuf *got = S_get_cb("");
+
+ CB_Cat(got, wanted);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat");
+ DECREF(got);
+
+ got = S_get_cb("a");
+ CB_Cat_Char(got, 0x263A);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Char");
+ DECREF(got);
+
+ got = S_get_cb("a");
+ CB_Cat_Str(got, smiley, smiley_len);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Str");
+ DECREF(got);
+
+ got = S_get_cb("a");
+ CB_Cat_Trusted_Str(got, smiley, smiley_len);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Trusted_Str");
+ DECREF(got);
+
+ DECREF(wanted);
+}
+
+static void
+test_Mimic_and_Clone(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo");
+ CharBuf *got = S_get_cb("bar");
+
+ CB_Mimic(got, (Obj*)wanted);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Mimic");
+ DECREF(got);
+
+ got = S_get_cb("bar");
+ CB_Mimic_Str(got, "foo", 3);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Mimic_Str");
+ DECREF(got);
+
+ got = CB_Clone(wanted);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Clone");
+ DECREF(got);
+
+ DECREF(wanted);
+}
+
+static void
+test_Code_Point_At_and_From(TestBatch *batch)
+{
+ u32_t code_points[] = { 'a', 0x263A, 0x263A, 'b', 0x263A, 'c' };
+ u32_t num_code_points = sizeof(code_points) / sizeof(u32_t);
+ CharBuf *string = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+ u32_t i;
+
+ for (i = 0; i < num_code_points; i++) {
+ u32_t from = num_code_points - i - 1;
+ ASSERT_INT_EQ(batch, CB_Code_Point_At(string, i), code_points[i],
+ "Code_Point_At %ld", (long)i);
+ ASSERT_INT_EQ(batch, CB_Code_Point_At(string, from),
+ code_points[from], "Code_Point_From %ld", (long)from);
+ }
+
+ DECREF(string);
+}
+
+static void
+test_SubString(TestBatch *batch)
+{
+ CharBuf *string = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+ CharBuf *wanted = CB_newf("%sb%s", smiley, smiley);
+ CharBuf *got = CB_SubString(string, 2, 3);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "SubString");
+ DECREF(wanted);
+ DECREF(got);
+ DECREF(string);
+}
+
+static void
+test_Nip_and_Chop(TestBatch *batch)
+{
+ CharBuf *wanted;
+ CharBuf *got;
+
+ wanted = CB_newf("%sb%sc", smiley, smiley);
+ got = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+ CB_Nip(got, 2);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Nip");
+ DECREF(wanted);
+ DECREF(got);
+
+ wanted = CB_newf("a%s%s", smiley, smiley);
+ got = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+ CB_Chop(got, 3);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Chop");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+
+static void
+test_Truncate(TestBatch *batch)
+{
+ CharBuf *wanted = CB_newf("a%s", smiley, smiley);
+ CharBuf *got = CB_newf("a%s%sb%sc", smiley, smiley, smiley);
+ CB_Truncate(got, 2);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Truncate");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_Trim(TestBatch *batch)
+{
+ u32_t spaces[] = {
+ ' ', '\t', '\r', '\n', 0x000B, 0x000C, 0x000D, 0x0085,
+ 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
+ 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029,
+ 0x202F, 0x205F, 0x3000
+ };
+ u32_t num_spaces = sizeof(spaces) / sizeof(u32_t);
+ u32_t i;
+ CharBuf *got = CB_new(0);
+
+ /* Surround a smiley with lots of whitespace. */
+ for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+ CB_Cat_Char(got, 0x263A);
+ for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+
+ ASSERT_TRUE(batch, CB_Trim_Top(got), "Trim_Top returns true on success");
+ ASSERT_FALSE(batch, CB_Trim_Top(got),
+ "Trim_Top returns false on failure");
+ ASSERT_TRUE(batch, CB_Trim_Tail(got), "Trim_Tail returns true on success");
+ ASSERT_FALSE(batch, CB_Trim_Tail(got),
+ "Trim_Tail returns false on failure");
+ ASSERT_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len),
+ "Trim_Top and Trim_Tail worked");
+
+ /* Build the spacey smiley again. */
+ CB_Truncate(got, 0);
+ for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+ CB_Cat_Char(got, 0x263A);
+ for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
+
+ ASSERT_TRUE(batch, CB_Trim(got), "Trim returns true on success");
+ ASSERT_FALSE(batch, CB_Trim(got), "Trim returns false on failure");
+ ASSERT_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len),
+ "Trim worked");
+
+ DECREF(got);
+}
+
+static void
+test_To_F64(TestBatch *batch)
+{
+ CharBuf *charbuf = S_get_cb("1.5");
+ double difference = 1.5 - CB_To_F64(charbuf);
+ if (difference < 0) { difference = 0 - difference; }
+ ASSERT_TRUE(batch, difference < 0.001, "To_F64");
+
+ CB_setf(charbuf, "-1.5");
+ difference = 1.5 + CB_To_F64(charbuf);
+ if (difference < 0) { difference = 0 - difference; }
+ ASSERT_TRUE(batch, difference < 0.001, "To_F64 negative");
+
+ DECREF(charbuf);
+}
+
+static void
+test_To_I64(TestBatch *batch)
+{
+ CharBuf *charbuf = S_get_cb("10");
+ ASSERT_TRUE(batch, CB_To_I64(charbuf) == 10, "To_I64");
+ CB_setf(charbuf, "-10");
+ ASSERT_TRUE(batch, CB_To_I64(charbuf) == -10, "To_I64 negative");
+ DECREF(charbuf);
+}
+
+
+static void
+test_vcatf_s(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar bizzle baz");
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %s baz", "bizzle");
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%s");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_null_string(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar [NULL] baz");
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %s baz", NULL);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%s NULL");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_cb(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar ZEKE baz");
+ CharBuf *catworthy = S_get_cb("ZEKE");
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %o baz", catworthy);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%o CharBuf");
+ DECREF(catworthy);
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_null_obj(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar [NULL] baz");
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %o baz", NULL);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%o NULL");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_i8(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar -3 baz");
+ i8_t num = -3;
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %i8 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%i8");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_i32(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar -100000 baz");
+ i32_t num = -100000;
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %i32 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%i32");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_i64(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar -5000000000 baz");
+ i64_t num = I64_C(-5000000000);
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %i64 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%i64");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_u8(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar 3 baz");
+ u8_t num = 3;
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %u8 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%u8");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_u32(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar 100000 baz");
+ u32_t num = 100000;
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %u32 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%u32");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_u64(TestBatch *batch)
+{
+ CharBuf *wanted = S_get_cb("foo bar 5000000000 baz");
+ u64_t num = U64_C(5000000000);
+ CharBuf *got = S_get_cb("foo ");
+ CB_catf(got, "bar %u64 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%u64");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_f64(TestBatch *batch)
+{
+ CharBuf *wanted;
+ char buf[64];
+ float num = 1.3f;
+ CharBuf *got = S_get_cb("foo ");
+ sprintf(buf, "foo bar %g baz", num);
+ wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
+ CB_catf(got, "bar %f64 baz", num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%f64");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+static void
+test_vcatf_x32(TestBatch *batch)
+{
+ CharBuf *wanted;
+ char buf[64];
+ unsigned long num = I32_MAX;
+ CharBuf *got = S_get_cb("foo ");
+#if (SIZEOF_LONG == 4)
+ sprintf(buf, "foo bar %.8lx baz", num);
+#elif (SIZEOF_INT == 4)
+ sprintf(buf, "foo bar %.8x baz", (unsigned)num);
+#endif
+ wanted = CB_new_from_trusted_utf8(buf, strlen(buf));
+ CB_catf(got, "bar %x32 baz", (u32_t)num);
+ ASSERT_TRUE(batch, CB_Equals(wanted, (Obj*)got), "%%x32");
+ DECREF(wanted);
+ DECREF(got);
+}
+
+void
+TestCB_run_tests()
+{
+ TestBatch *batch = Test_new_batch("TestCharBuf", 47, NULL);
+ PLAN(batch);
+
+ test_vcatf_s(batch);
+ test_vcatf_null_string(batch);
+ test_vcatf_cb(batch);
+ test_vcatf_null_obj(batch);
+ test_vcatf_i8(batch);
+ test_vcatf_i32(batch);
+ test_vcatf_i64(batch);
+ test_vcatf_u8(batch);
+ test_vcatf_u32(batch);
+ test_vcatf_u64(batch);
+ test_vcatf_f64(batch);
+ test_vcatf_x32(batch);
+ test_Cat(batch);
+ test_Mimic_and_Clone(batch);
+ test_Code_Point_At_and_From(batch);
+ test_SubString(batch);
+ test_Nip_and_Chop(batch);
+ test_Truncate(batch);
+ test_Trim(batch);
+ test_To_F64(batch);
+ test_To_I64(batch);
+
+ batch->destroy(batch);
+}
+
+/* Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/core/Lucy/Test/Object/TestCharBuf.c
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm (added)
+++ lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm Tue Sep 15 01:29:17 2009
@@ -0,0 +1,82 @@
+use Lucy;
+
+1;
+
+__END__
+
+__BINDING__
+
+my $xs_code = <<'END_XS_CODE';
+MODULE = Lucy PACKAGE = Lucy::Object::CharBuf
+
+SV*
+new(either_sv, sv)
+ SV *either_sv;
+ SV *sv;
+CODE:
+{
+ STRLEN size;
+ char *ptr = SvPVutf8(sv, size);
+ lucy_CharBuf *self = (lucy_CharBuf*)XSBind_new_blank_obj(either_sv);
+ lucy_CB_init(self, size);
+ Lucy_CB_Cat_Trusted_Str(self, ptr, size);
+ RETVAL = LUCY_OBJ_TO_SV_NOINC(self);
+}
+OUTPUT: RETVAL
+
+SV*
+_clone(self)
+ lucy_CharBuf *self;
+CODE:
+ RETVAL = LUCY_OBJ_TO_SV_NOINC(lucy_CB_clone(self));
+OUTPUT: RETVAL
+
+SV*
+to_perl(self)
+ lucy_CharBuf *self;
+CODE:
+ RETVAL = XSBind_cb_to_sv(self);
+OUTPUT: RETVAL
+
+MODULE = Lucy PACKAGE = Lucy::Object::ViewCharBuf
+
+SV*
+_new(unused, sv)
+ SV *unused;
+ SV *sv;
+CODE:
+{
+ STRLEN size;
+ char *ptr = SvPVutf8(sv, size);
+ lucy_ViewCharBuf *self
+ = lucy_ViewCB_new_from_trusted_utf8(ptr, size);
+ CHY_UNUSED_VAR(unused);
+ RETVAL = LUCY_OBJ_TO_SV_NOINC(self);
+}
+OUTPUT: RETVAL
+END_XS_CODE
+
+Boilerplater::Binding::Perl::Class->register(
+ parcel => "Lucy",
+ class_name => "Lucy::Object::CharBuf",
+ xs_code => $xs_code,
+);
+
+__COPYRIGHT__
+
+ /**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
Propchange: lucene/lucy/trunk/perl/lib/Lucy/Object/CharBuf.pm
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/lucy/trunk/perl/t/core/029-charbuf.t
URL: http://svn.apache.org/viewvc/lucene/lucy/trunk/perl/t/core/029-charbuf.t?rev=814956&view=auto
==============================================================================
--- lucene/lucy/trunk/perl/t/core/029-charbuf.t (added)
+++ lucene/lucy/trunk/perl/t/core/029-charbuf.t Tue Sep 15 01:29:17 2009
@@ -0,0 +1,6 @@
+use strict;
+use warnings;
+
+use Lucy::Test;
+Lucy::Test::run_tests("TestCharBuf");
+
Propchange: lucene/lucy/trunk/perl/t/core/029-charbuf.t
------------------------------------------------------------------------------
svn:eol-style = native