You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/09/06 00:11:22 UTC
[lucy-commits] [3/4] git commit: refs/heads/cfish-string-prep1 - Implement some
string methods with StringIterators
Implement some string methods with StringIterators
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/52bab257
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/52bab257
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/52bab257
Branch: refs/heads/cfish-string-prep1
Commit: 52bab257c5592f5bde2f4bae72df5d60d8908af1
Parents: f7edbe5
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Thu Sep 5 00:08:18 2013 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Thu Sep 5 00:08:18 2013 +0200
----------------------------------------------------------------------
clownfish/runtime/core/Clownfish/String.c | 120 ++++++++-----------
.../runtime/core/Clownfish/Test/TestString.c | 39 +++++-
2 files changed, 88 insertions(+), 71 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/52bab257/clownfish/runtime/core/Clownfish/String.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/String.c b/clownfish/runtime/core/Clownfish/String.c
index 10dac38..f6f1a85 100644
--- a/clownfish/runtime/core/Clownfish/String.c
+++ b/clownfish/runtime/core/Clownfish/String.c
@@ -162,12 +162,12 @@ Str_Destroy_IMP(String *self) {
int32_t
Str_Hash_Sum_IMP(String *self) {
uint32_t hashvalue = 5381;
- StackString *iterator = SSTR_WRAP(self);
+ StackStringIterator *iter = STR_STACKTOP(self);
- const ViewCB_Nibble_t nibble = METHOD_PTR(iterator->vtable,
- CFISH_ViewCB_Nibble);
- while (iterator->size) {
- uint32_t code_point = (uint32_t)nibble((ViewCharBuf*)iterator);
+ const StrIter_Next_t next
+ = METHOD_PTR(STRINGITERATOR, CFISH_StrIter_Next);
+ uint32_t code_point;
+ while (STRITER_DONE != (code_point = next((StringIterator*)iter))) {
hashvalue = ((hashvalue << 5) + hashvalue) ^ code_point;
}
@@ -245,19 +245,19 @@ Str_To_I64_IMP(String *self) {
int64_t
Str_BaseX_To_I64_IMP(String *self, uint32_t base) {
- StackString *iterator = SSTR_WRAP(self);
+ StackStringIterator *iter = STR_STACKTOP(self);
int64_t retval = 0;
bool is_negative = false;
+ uint32_t code_point = SStrIter_Next(iter);
// Advance past minus sign.
- if (SStr_Code_Point_At(iterator, 0) == '-') {
- SStr_Nibble(iterator);
+ if (code_point == '-') {
+ code_point = SStrIter_Next(iter);
is_negative = true;
}
// Accumulate.
- while (iterator->size) {
- int32_t code_point = SStr_Nibble(iterator);
+ while (code_point != STRITER_DONE) {
if (isalnum(code_point)) {
int32_t addend = isdigit(code_point)
? code_point - '0'
@@ -269,6 +269,7 @@ Str_BaseX_To_I64_IMP(String *self, uint32_t base) {
else {
break;
}
+ code_point = SStrIter_Next(iter);
}
// Apply minus sign.
@@ -415,14 +416,14 @@ Str_Find_IMP(String *self, const String *substring) {
int64_t
Str_Find_Str_IMP(String *self, const char *ptr, size_t size) {
- StackString *iterator = SSTR_WRAP(self);
+ StackStringIterator *iter = STR_STACKTOP(self);
int64_t location = 0;
- while (iterator->size) {
- if (SStr_Starts_With_Str(iterator, ptr, size)) {
+ while (iter->byte_offset + size <= self->size) {
+ if (memcmp(self->ptr + iter->byte_offset, ptr, size) == 0) {
return location;
}
- SStr_Nip(iterator, 1);
+ SStrIter_Advance(iter, 1);
location++;
}
@@ -479,17 +480,8 @@ Str_Trim_Tail_IMP(String *self) {
size_t
Str_Length_IMP(String *self) {
- size_t len = 0;
- char *ptr = self->ptr;
- char *end = ptr + self->size;
- while (ptr < end) {
- ptr += StrHelp_UTF8_COUNT[*(uint8_t*)ptr];
- len++;
- }
- if (ptr != end) {
- DIE_INVALID_UTF8(self->ptr, self->size);
- }
- return len;
+ StackStringIterator *iter = STR_STACKTOP(self);
+ return SStrIter_Advance(iter, SIZE_MAX);
}
size_t
@@ -503,45 +495,31 @@ Str_Truncate_IMP(String *self, size_t count) {
uint32_t
Str_Code_Point_At_IMP(String *self, size_t tick) {
- size_t count = 0;
- char *ptr = self->ptr;
- char *const end = ptr + self->size;
-
- for (; ptr < end; ptr += StrHelp_UTF8_COUNT[*(uint8_t*)ptr]) {
- if (count == tick) {
- if (ptr > end) {
- DIE_INVALID_UTF8(self->ptr, self->size);
- }
- return StrHelp_decode_utf8_char(ptr);
- }
- count++;
- }
-
- return 0;
+ StackStringIterator *iter = STR_STACKTOP(self);
+ SStrIter_Advance(iter, tick);
+ uint32_t code_point = SStrIter_Next(iter);
+ return code_point == STRITER_DONE ? 0 : code_point;
}
uint32_t
Str_Code_Point_From_IMP(String *self, size_t tick) {
- size_t count = 0;
- char *top = self->ptr;
- const char *ptr = top + self->size;
-
- for (count = 0; count < tick; count++) {
- if (NULL == (ptr = StrHelp_back_utf8_char(ptr, top))) { return 0; }
- }
- return StrHelp_decode_utf8_char(ptr);
+ if (tick == 0) { return 0; }
+ StackStringIterator *iter = STR_STACKTAIL(self);
+ SStrIter_Recede(iter, tick - 1);
+ uint32_t code_point = SStrIter_Prev(iter);
+ return code_point == STRITER_DONE ? 0 : code_point;
}
String*
Str_SubString_IMP(String *self, size_t offset, size_t len) {
- StackString *iterator = SSTR_WRAP(self);
- char *sub_start;
- size_t byte_len;
+ StackStringIterator *iter = STR_STACKTOP(self);
- SStr_Nip(iterator, offset);
- sub_start = iterator->ptr;
- SStr_Nip(iterator, len);
- byte_len = iterator->ptr - sub_start;
+ SStrIter_Advance(iter, offset);
+ int start_offset = iter->byte_offset;
+ char *sub_start = self->ptr + start_offset;
+
+ SStrIter_Advance(iter, len);
+ size_t byte_len = iter->byte_offset - start_offset;
return Str_new_from_trusted_utf8(sub_start, byte_len);
}
@@ -550,23 +528,31 @@ int
Str_compare(const void *va, const void *vb) {
const String *a = *(const String**)va;
const String *b = *(const String**)vb;
- StackString *iterator_a = SSTR_WRAP(a);
- StackString *iterator_b = SSTR_WRAP(b);
- while (iterator_a->size && iterator_b->size) {
- int32_t code_point_a = SStr_Nibble(iterator_a);
- int32_t code_point_b = SStr_Nibble(iterator_b);
- const int32_t comparison = code_point_a - code_point_b;
- if (comparison != 0) { return comparison; }
- }
- if (iterator_a->size != iterator_b->size) {
- return iterator_a->size < iterator_b->size ? -1 : 1;
+
+ StackStringIterator *iter_a = STR_STACKTOP(a);
+ StackStringIterator *iter_b = STR_STACKTOP(b);
+
+ while (true) {
+ uint32_t code_point_a = SStrIter_Next(iter_a);
+ uint32_t code_point_b = SStrIter_Next(iter_b);
+
+ if (code_point_a == STRITER_DONE) {
+ return code_point_b == STRITER_DONE ? 0 : -1;
+ }
+ if (code_point_b == STRITER_DONE) {
+ return 1;
+ }
+ if (code_point_a != code_point_b) {
+ return code_point_a < code_point_b ? -1 : 1;
+ }
}
- return 0;
+
+ UNREACHABLE_RETURN(int);
}
bool
Str_less_than(const void *va, const void *vb) {
- return Str_compare(va, vb) < 0 ? 1 : 0;
+ return Str_compare(va, vb) < 0 ? true : false;
}
void
http://git-wip-us.apache.org/repos/asf/lucy/blob/52bab257/clownfish/runtime/core/Clownfish/Test/TestString.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/Test/TestString.c b/clownfish/runtime/core/Clownfish/Test/TestString.c
index 46bc62d..eced59f 100644
--- a/clownfish/runtime/core/Clownfish/Test/TestString.c
+++ b/clownfish/runtime/core/Clownfish/Test/TestString.c
@@ -152,11 +152,11 @@ test_Code_Point_At_and_From(TestBatchRunner *runner) {
uint32_t i;
for (i = 0; i < num_code_points; i++) {
- uint32_t from = num_code_points - i - 1;
+ uint32_t from = num_code_points - i;
TEST_INT_EQ(runner, Str_Code_Point_At(string, i), code_points[i],
"Code_Point_At %ld", (long)i);
- TEST_INT_EQ(runner, Str_Code_Point_At(string, from),
- code_points[from], "Code_Point_From %ld", (long)from);
+ TEST_INT_EQ(runner, Str_Code_Point_From(string, from),
+ code_points[i], "Code_Point_From %ld", (long)from);
}
DECREF(string);
@@ -269,6 +269,35 @@ test_To_I64(TestBatchRunner *runner) {
}
static void
+test_Length(TestBatchRunner *runner) {
+ String *string = Str_newf("a%s%sb%sc", smiley, smiley, smiley);
+ TEST_INT_EQ(runner, Str_Length(string), 6, "Length");
+ DECREF(string);
+}
+
+static void
+test_Compare_To(TestBatchRunner *runner) {
+ String *abc = Str_newf("a%s%sb%sc", smiley, smiley, smiley);
+ String *ab = Str_newf("a%s%sb", smiley, smiley);
+ String *ac = Str_newf("a%s%sc", smiley, smiley);
+
+ TEST_TRUE(runner, Str_Compare_To(abc, (Obj*)abc) == 0,
+ "Compare_To abc abc");
+ TEST_TRUE(runner, Str_Compare_To(ab, (Obj*)abc) < 0,
+ "Compare_To ab abc");
+ TEST_TRUE(runner, Str_Compare_To(abc, (Obj*)ab) > 0,
+ "Compare_To abc ab");
+ TEST_TRUE(runner, Str_Compare_To(ab, (Obj*)ac) < 0,
+ "Compare_To ab ac");
+ TEST_TRUE(runner, Str_Compare_To(ac, (Obj*)ab) > 0,
+ "Compare_To ac ab");
+
+ DECREF(ac);
+ DECREF(ab);
+ DECREF(abc);
+}
+
+static void
test_iterator(TestBatchRunner *runner) {
static const uint32_t code_points[] = {
0x41,
@@ -442,7 +471,7 @@ test_iterator_substring(TestBatchRunner *runner) {
void
TestStr_Run_IMP(TestString *self, TestBatchRunner *runner) {
- TestBatchRunner_Plan(runner, (TestBatch*)self, 96);
+ TestBatchRunner_Plan(runner, (TestBatch*)self, 102);
test_Cat(runner);
test_Mimic_and_Clone(runner);
test_Code_Point_At_and_From(runner);
@@ -453,6 +482,8 @@ TestStr_Run_IMP(TestString *self, TestBatchRunner *runner) {
test_Trim(runner);
test_To_F64(runner);
test_To_I64(runner);
+ test_Length(runner);
+ test_Compare_To(runner);
test_iterator(runner);
test_iterator_whitespace(runner);
test_iterator_substring(runner);