You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/09/06 00:11:21 UTC
[lucy-commits] [2/4] git commit: refs/heads/cfish-string-prep1 - Initial
implementation of StringIterator
Initial implementation of StringIterator
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/f7edbe56
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/f7edbe56
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/f7edbe56
Branch: refs/heads/cfish-string-prep1
Commit: f7edbe56b38789de10c6ed34faec7836919ad8c5
Parents: cfea9e6
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Wed Sep 4 22:42:00 2013 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Sep 4 22:48:21 2013 +0200
----------------------------------------------------------------------
clownfish/runtime/core/Clownfish/String.c | 314 +++++++++++++++++++
clownfish/runtime/core/Clownfish/String.cfh | 121 +++++++
.../runtime/core/Clownfish/Test/TestString.c | 220 +++++++++++--
3 files changed, 636 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucy/blob/f7edbe56/clownfish/runtime/core/Clownfish/String.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/String.c b/clownfish/runtime/core/Clownfish/String.c
index 07da5d8..10dac38 100644
--- a/clownfish/runtime/core/Clownfish/String.c
+++ b/clownfish/runtime/core/Clownfish/String.c
@@ -17,6 +17,8 @@
#define C_CFISH_STRING
#define C_CFISH_VIEWCHARBUF
#define C_CFISH_STACKSTRING
+#define C_CFISH_STRINGITERATOR
+#define C_CFISH_STACKSTRINGITERATOR
#define CFISH_USE_SHORT_NAMES
#define CHY_USE_SHORT_NAMES
@@ -35,6 +37,11 @@
#include "Clownfish/Util/Memory.h"
#include "Clownfish/Util/StringHelper.h"
+#define STR_STACKTOP(string) \
+ Str_StackTop(string, alloca(sizeof(StackStringIterator)))
+#define STR_STACKTAIL(string) \
+ Str_StackTail(string, alloca(sizeof(StackStringIterator)))
+
// Helper function for throwing invalid UTF-8 error. Since THROW uses
// a String internally, calling THROW with invalid UTF-8 would create an
// infinite loop -- so we fwrite some of the bogus text to stderr and
@@ -577,6 +584,26 @@ Str_Get_Ptr8_IMP(String *self) {
return (uint8_t*)self->ptr;
}
+StringIterator*
+Str_Top_IMP(String *self) {
+ return StrIter_new(self, 0);
+}
+
+StringIterator*
+Str_Tail_IMP(String *self) {
+ return StrIter_new(self, self->size);
+}
+
+StackStringIterator*
+Str_StackTop_IMP(String *self, void *allocation) {
+ return SStrIter_new(allocation, self, 0);
+}
+
+StackStringIterator*
+Str_StackTail_IMP(String *self, void *allocation) {
+ return SStrIter_new(allocation, self, self->size);
+}
+
/*****************************************************************/
ViewCharBuf*
@@ -765,4 +792,291 @@ SStr_Destroy_IMP(StackString *self) {
THROW(ERR, "Can't destroy a StackString ('%o')", self);
}
+/*****************************************************************/
+
+StringIterator*
+StrIter_new(String *string, size_t byte_offset) {
+ StringIterator *self = (StringIterator*)VTable_Make_Obj(STRINGITERATOR);
+ self->string = (String*)INCREF(string);
+ self->byte_offset = byte_offset;
+ return self;
+}
+
+String*
+StrIter_substring(StringIterator *top, StringIterator *tail) {
+ String *string = top->string;
+
+ if (string != tail->string) {
+ THROW(ERR, "StrIter_substring: strings don't match");
+ }
+ if (top->byte_offset > tail->byte_offset) {
+ THROW(ERR, "StrIter_substring: top is behind tail");
+ }
+ if (tail->byte_offset > string->size) {
+ THROW(ERR, "Invalid StringIterator offset");
+ }
+
+ return Str_new_from_trusted_utf8(string->ptr + top->byte_offset,
+ tail->byte_offset - top->byte_offset);
+}
+
+Obj*
+StrIter_Clone_IMP(StringIterator *self) {
+ return (Obj*)StrIter_new(self->string, self->byte_offset);
+}
+
+void
+StrIter_Assign_IMP(StringIterator *self, StringIterator *other) {
+ if (self->string != other->string) {
+ DECREF(self->string);
+ self->string = (String*)INCREF(other->string);
+ }
+ self->byte_offset = other->byte_offset;
+}
+
+bool
+StrIter_Equals_IMP(StringIterator *self, Obj *other) {
+ StringIterator *const twin = (StringIterator*)other;
+ if (twin == self) { return true; }
+ if (!Obj_Is_A(other, STRINGITERATOR)) { return false; }
+ return self->string == twin->string
+ && self->byte_offset == twin->byte_offset;
+}
+
+int32_t
+StrIter_Compare_To_IMP(StringIterator *self, Obj *other) {
+ StringIterator *twin = (StringIterator*)CERTIFY(other, STRINGITERATOR);
+ if (self->string != twin->string) {
+ THROW(ERR, "Can't compare iterators of different strings");
+ }
+ if (self->byte_offset < twin->byte_offset) { return -1; }
+ if (self->byte_offset > twin->byte_offset) { return 1; }
+ return 0;
+}
+
+bool
+StrIter_Has_Next_IMP(StringIterator *self) {
+ return self->byte_offset < self->string->size;
+}
+
+bool
+StrIter_Has_Prev_IMP(StringIterator *self) {
+ return self->byte_offset != 0;
+}
+
+uint32_t
+StrIter_Next_IMP(StringIterator *self) {
+ String *string = self->string;
+ size_t byte_offset = self->byte_offset;
+ size_t size = string->size;
+
+ if (byte_offset >= size) { return STRITER_DONE; }
+
+ const uint8_t *const ptr = (const uint8_t*)string->ptr;
+ uint32_t retval = ptr[byte_offset++];
+
+ if (retval >= 0x80) {
+ /*
+ * The 'mask' bit is tricky. In each iteration, 'retval' is
+ * left-shifted by 6 and 'mask' by 5 bits. So relative to the first
+ * byte of the sequence, 'mask' moves one bit to the right.
+ *
+ * The possible outcomes after the loop are:
+ *
+ * Two byte sequence
+ * retval: 110aaaaa bbbbbb
+ * mask: 00100000 000000
+ *
+ * Three byte sequence
+ * retval: 1110aaaa bbbbbb cccccc
+ * mask: 00010000 000000 000000
+ *
+ * Four byte sequence
+ * retval: 11110aaa bbbbbb cccccc dddddd
+ * mask: 00001000 000000 000000 000000
+ *
+ * This also illustrates why the exit condition (retval & mask)
+ * works. After the first iteration, the third most significant bit
+ * is tested. After the second iteration, the fourth, and so on.
+ */
+
+ uint32_t mask = 1 << 6;
+
+ do {
+ if (byte_offset >= size) {
+ THROW(ERR, "StrIter_Next: Invalid UTF-8");
+ }
+
+ retval = (retval << 6) | (ptr[byte_offset++] & 0x3F);
+ mask <<= 5;
+ } while (retval & mask);
+
+ retval &= mask - 1;
+ }
+
+ self->byte_offset = byte_offset;
+ return retval;
+}
+
+uint32_t
+StrIter_Prev_IMP(StringIterator *self) {
+ size_t byte_offset = self->byte_offset;
+
+ if (byte_offset == 0) { return STRITER_DONE; }
+
+ const uint8_t *const ptr = (const uint8_t*)self->string->ptr;
+ uint32_t retval = ptr[--byte_offset];
+
+ if (retval >= 0x80) {
+ // Construct the result from right to left.
+
+ if (byte_offset == 0) {
+ THROW(ERR, "StrIter_Prev: Invalid UTF-8");
+ }
+
+ retval &= 0x3F;
+ int shift = 6;
+ uint32_t first_byte_mask = 0x1F;
+ uint32_t byte = ptr[--byte_offset];
+
+ while ((byte & 0xC0) == 0x80) {
+ if (byte_offset == 0) {
+ THROW(ERR, "StrIter_Prev: Invalid UTF-8");
+ }
+
+ retval |= (byte & 0x3F) << shift;
+ shift += 6;
+ first_byte_mask >>= 1;
+ byte = ptr[--byte_offset];
+ }
+
+ retval |= (byte & first_byte_mask) << shift;
+ }
+
+ self->byte_offset = byte_offset;
+ return retval;
+}
+
+size_t
+StrIter_Advance_IMP(StringIterator *self, size_t num) {
+ size_t num_skipped = 0;
+ size_t byte_offset = self->byte_offset;
+ size_t size = self->string->size;
+ const uint8_t *const ptr = (const uint8_t*)self->string->ptr;
+
+ while (num_skipped < num) {
+ if (byte_offset >= size) {
+ break;
+ }
+ uint8_t first_byte = ptr[byte_offset];
+ byte_offset += StrHelp_UTF8_COUNT[first_byte];
+ ++num_skipped;
+ }
+
+ if (byte_offset > size) {
+ THROW(ERR, "StrIter_Advance: Invalid UTF-8");
+ }
+
+ self->byte_offset = byte_offset;
+ return num_skipped;
+}
+
+size_t
+StrIter_Recede_IMP(StringIterator *self, size_t num) {
+ size_t num_skipped = 0;
+ size_t byte_offset = self->byte_offset;
+ const uint8_t *const ptr = (const uint8_t*)self->string->ptr;
+
+ while (num_skipped < num) {
+ if (byte_offset == 0) {
+ break;
+ }
+
+ uint8_t byte;
+ do {
+ if (byte_offset == 0) {
+ THROW(ERR, "StrIter_Recede: Invalid UTF-8");
+ }
+
+ byte = ptr[--byte_offset];
+ } while ((byte & 0xC0) == 0x80);
+ ++num_skipped;
+ }
+
+ self->byte_offset = byte_offset;
+ return num_skipped;
+}
+
+size_t
+StrIter_Skip_Next_Whitespace_IMP(StringIterator *self) {
+ size_t num_skipped = 0;
+ size_t byte_offset = self->byte_offset;
+ uint32_t code_point;
+
+ while (STRITER_DONE != (code_point = StrIter_Next(self))) {
+ if (!StrHelp_is_whitespace(code_point)) { break; }
+ byte_offset = self->byte_offset;
+ ++num_skipped;
+ }
+
+ self->byte_offset = byte_offset;
+ return num_skipped;
+}
+
+size_t
+StrIter_Skip_Prev_Whitespace_IMP(StringIterator *self) {
+ size_t num_skipped = 0;
+ size_t byte_offset = self->byte_offset;
+ uint32_t code_point;
+
+ while (STRITER_DONE != (code_point = StrIter_Prev(self))) {
+ if (!StrHelp_is_whitespace(code_point)) { break; }
+ byte_offset = self->byte_offset;
+ ++num_skipped;
+ }
+
+ self->byte_offset = byte_offset;
+ return num_skipped;
+}
+
+bool
+StrIter_Starts_With_IMP(StringIterator *self, String *prefix) {
+ String *string = self->string;
+ size_t byte_offset = self->byte_offset;
+
+ if (byte_offset > string->size) {
+ THROW(ERR, "Invalid StringIterator offset");
+ }
+
+ if (string->size - byte_offset < prefix->size) { return false; }
+
+ const char *ptr = string->ptr + byte_offset;
+ return memcmp(ptr, prefix->ptr, prefix->size) == 0;
+}
+
+void
+StrIter_Destroy_IMP(StringIterator *self) {
+ DECREF(self->string);
+ SUPER_DESTROY(self, STRINGITERATOR);
+}
+
+/*****************************************************************/
+
+StackStringIterator*
+SStrIter_new(void *allocation, String *string, size_t byte_offset) {
+ StackStringIterator *self
+ = (StackStringIterator*)VTable_Init_Obj(STACKSTRINGITERATOR,
+ allocation);
+ // Assume that the string will be available for the lifetime of the
+ // iterator and don't increase its refcount.
+ self->string = string;
+ self->byte_offset = byte_offset;
+ return self;
+}
+
+void
+SStrIter_Destroy_IMP(StackStringIterator *self) {
+ THROW(ERR, "Can't destroy a StackStringIterator");
+}
+
http://git-wip-us.apache.org/repos/asf/lucy/blob/f7edbe56/clownfish/runtime/core/Clownfish/String.cfh
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/String.cfh b/clownfish/runtime/core/Clownfish/String.cfh
index f56b26c..d6d378a 100644
--- a/clownfish/runtime/core/Clownfish/String.cfh
+++ b/clownfish/runtime/core/Clownfish/String.cfh
@@ -270,6 +270,26 @@ class Clownfish::String cnick Str
*/
incremented String*
SubString(String *self, size_t offset, size_t len);
+
+ /** Return an iterator to the start of the string.
+ */
+ incremented StringIterator*
+ Top(String *self);
+
+ /** Return an iterator to the end of the string.
+ */
+ incremented StringIterator*
+ Tail(String *self);
+
+ /** Return a stack iterator to the start of the string.
+ */
+ incremented StackStringIterator*
+ StackTop(String *self, void *allocation);
+
+ /** Return a stack iterator to the end of the string.
+ */
+ incremented StackStringIterator*
+ StackTail(String *self, void *allocation);
}
class Clownfish::ViewCharBuf cnick ViewCB
@@ -356,6 +376,104 @@ class Clownfish::StackString cnick SStr
Destroy(StackString *self);
}
+class Clownfish::StringIterator cnick StrIter
+ inherits Clownfish::Obj {
+
+ String *string;
+ size_t byte_offset;
+
+ inert incremented StringIterator*
+ new(String *string, size_t byte_offset);
+
+ /** Return the substring between the top and tail iterators.
+ * @param offset Offset from the top, in code points.
+ * @param len The desired length of the substring, in code points.
+ */
+ inert incremented String*
+ substring(StringIterator *top, StringIterator *tail);
+
+ public incremented Obj*
+ Clone(StringIterator *self);
+
+ public void
+ Assign(StringIterator *self, StringIterator *other);
+
+ public bool
+ Equals(StringIterator *self, Obj *other);
+
+ public int32_t
+ Compare_To(StringIterator *self, Obj *other);
+
+ /** Return true if the iterator is not at the end of the string.
+ */
+ public bool
+ Has_Next(StringIterator *self);
+
+ /** Return true if the iterator is not at the start of the string.
+ */
+ public bool
+ Has_Prev(StringIterator *self);
+
+ /** Return the code point after the current position and advance the
+ * iterator. Return CFISH_STRITER_DONE at the end of the string.
+ */
+ public uint32_t
+ Next(StringIterator *self);
+
+ /** Return the code point before the current position and go one step back.
+ * Return CFISH_STRITER_DONE at the start of the string.
+ */
+ public uint32_t
+ Prev(StringIterator *self);
+
+ /** Skip code points.
+ * @param num The number of code points to skip.
+ * @return the number of code points actually skipped. This can be less
+ * than the requested number if the end of the string is reached.
+ */
+ public size_t
+ Advance(StringIterator *self, size_t num);
+
+ /** Skip code points backward.
+ * @param num The number of code points to skip.
+ * @return the number of code points actually skipped. This can be less
+ * than the requested number if the start of the string is reached.
+ */
+ public size_t
+ Recede(StringIterator *self, size_t num);
+
+ /** Skip whitespace.
+ * @return the number of code points skipped.
+ */
+ public size_t
+ Skip_Next_Whitespace(StringIterator *self);
+
+ /** Skip whitespace backward.
+ * @return the number of code points skipped.
+ */
+ public size_t
+ Skip_Prev_Whitespace(StringIterator *self);
+
+ /** Test whether the content after the iterator starts with
+ * <code>prefix</code>.
+ */
+ bool
+ Starts_With(StringIterator *self, String *prefix);
+
+ public void
+ Destroy(StringIterator *self);
+}
+
+class Clownfish::StackStringIterator cnick SStrIter
+ inherits Clownfish::StringIterator {
+
+ inert incremented StackStringIterator*
+ new(void *allocation, String *string, size_t byte_offset);
+
+ public void
+ Destroy(StackStringIterator *self);
+}
+
__C__
#define CFISH_SStr_BLANK() cfish_SStr_new(cfish_alloca(cfish_SStr_size()))
@@ -366,10 +484,13 @@ __C__
#define CFISH_SStr_WRAP_STR(ptr, size) \
cfish_SStr_wrap_str(cfish_alloca(cfish_SStr_size()), ptr, size)
+#define CFISH_STRITER_DONE UINT32_MAX
+
#ifdef CFISH_USE_SHORT_NAMES
#define SStr_BLANK CFISH_SStr_BLANK
#define SSTR_WRAP CFISH_SStr_WRAP
#define SSTR_WRAP_STR CFISH_SStr_WRAP_STR
+ #define STRITER_DONE CFISH_STRITER_DONE
#endif
__END_C__
http://git-wip-us.apache.org/repos/asf/lucy/blob/f7edbe56/clownfish/runtime/core/Clownfish/Test/TestString.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/Test/TestString.c b/clownfish/runtime/core/Clownfish/Test/TestString.c
index 5f8337c..46bc62d 100644
--- a/clownfish/runtime/core/Clownfish/Test/TestString.c
+++ b/clownfish/runtime/core/Clownfish/Test/TestString.c
@@ -46,6 +46,30 @@ S_get_str(const char *string) {
return Str_new_from_utf8(string, strlen(string));
}
+// Surround a smiley with lots of whitespace.
+static String*
+S_smiley_with_whitespace(int *num_spaces_ptr) {
+ uint32_t spaces[] = {
+ ' ', '\t', '\r', '\n', 0x000B, 0x000C, 0x000D, 0x0085,
+ 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
+ 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029,
+ 0x202F, 0x205F, 0x3000
+ };
+ int num_spaces = sizeof(spaces) / sizeof(uint32_t);
+ String *got;
+
+ CharBuf *buf = CB_new(0);
+ for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
+ CB_Cat_Char(buf, 0x263A);
+ for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
+
+ String *retval = CB_To_String(buf);
+ if (num_spaces_ptr) { *num_spaces_ptr = num_spaces; }
+
+ DECREF(buf);
+ return retval;
+}
+
static void
test_Cat(TestBatchRunner *runner) {
String *wanted = Str_newf("a%s", smiley);
@@ -185,23 +209,9 @@ test_Truncate(TestBatchRunner *runner) {
static void
test_Trim(TestBatchRunner *runner) {
- uint32_t spaces[] = {
- ' ', '\t', '\r', '\n', 0x000B, 0x000C, 0x000D, 0x0085,
- 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
- 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029,
- 0x202F, 0x205F, 0x3000
- };
- uint32_t num_spaces = sizeof(spaces) / sizeof(uint32_t);
- uint32_t i;
String *got;
- // Surround a smiley with lots of whitespace.
- CharBuf *buf = CB_new(0);
- for (i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
- CB_Cat_Char(buf, 0x263A);
- for (i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
-
- got = CB_To_String(buf);
+ got = S_smiley_with_whitespace(NULL);
TEST_TRUE(runner, Str_Trim_Top(got), "Trim_Top returns true on success");
TEST_FALSE(runner, Str_Trim_Top(got),
"Trim_Top returns false on failure");
@@ -212,14 +222,12 @@ test_Trim(TestBatchRunner *runner) {
"Trim_Top and Trim_Tail worked");
DECREF(got);
- got = CB_To_String(buf);
+ got = S_smiley_with_whitespace(NULL);
TEST_TRUE(runner, Str_Trim(got), "Trim returns true on success");
TEST_FALSE(runner, Str_Trim(got), "Trim returns false on failure");
TEST_TRUE(runner, Str_Equals_Str(got, smiley, smiley_len),
"Trim worked");
DECREF(got);
-
- DECREF(buf);
}
static void
@@ -260,10 +268,181 @@ test_To_I64(TestBatchRunner *runner) {
DECREF(string);
}
+static void
+test_iterator(TestBatchRunner *runner) {
+ static const uint32_t code_points[] = {
+ 0x41,
+ 0x7F,
+ 0x80,
+ 0x7FF,
+ 0x800,
+ 0xFFFF,
+ 0x10000,
+ 0x10FFFF
+ };
+ static size_t num_code_points
+ = sizeof(code_points) / sizeof(code_points[0]);
+
+ CharBuf *buf = CB_new(0);
+ for (int i = 0; i < num_code_points; ++i) {
+ CB_Cat_Char(buf, code_points[i]);
+ }
+ String *string = CB_To_String(buf);
+
+ {
+ StringIterator *top = Str_Top(string);
+ StringIterator *tail = Str_Tail(string);
+
+ TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1,
+ "Compare_To top < tail");
+ TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1,
+ "Compare_To tail > top");
+ TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0,
+ "Compare_To top == top");
+
+ StringIterator *clone = (StringIterator*)StrIter_Clone(top);
+ TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone");
+
+ StrIter_Assign(clone, tail);
+ TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign");
+
+ DECREF(clone);
+ DECREF(top);
+ DECREF(tail);
+ }
+
+ {
+ StringIterator *iter = Str_Top(string);
+
+ for (int i = 0; i < num_code_points; ++i) {
+ TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i);
+ uint32_t code_point = StrIter_Next(iter);
+ TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i);
+ }
+
+ TEST_TRUE(runner, !StrIter_Has_Next(iter),
+ "Has_Next at end of string");
+ TEST_INT_EQ(runner, StrIter_Next(iter), STRITER_DONE,
+ "Next at end of string");
+
+ StringIterator *tail = Str_Tail(string);
+ TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail");
+
+ DECREF(tail);
+ DECREF(iter);
+ }
+
+ {
+ StringIterator *iter = Str_Tail(string);
+
+ for (int i = num_code_points - 1; i >= 0; --i) {
+ TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i);
+ uint32_t code_point = StrIter_Prev(iter);
+ TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i);
+ }
+
+ TEST_TRUE(runner, !StrIter_Has_Prev(iter),
+ "Has_Prev at end of string");
+ TEST_INT_EQ(runner, StrIter_Prev(iter), STRITER_DONE,
+ "Prev at start of string");
+
+ StringIterator *top = Str_Top(string);
+ TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top");
+
+ DECREF(top);
+ DECREF(iter);
+ }
+
+ {
+ StringIterator *iter = Str_Top(string);
+
+ StrIter_Next(iter);
+ TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2,
+ "Advance returns number of code points");
+ TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3],
+ "Advance works");
+ TEST_INT_EQ(runner,
+ StrIter_Advance(iter, 1000000), num_code_points - 4,
+ "Advance past end of string");
+
+ StrIter_Prev(iter);
+ TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2,
+ "Recede returns number of code points");
+ TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4],
+ "Recede works");
+ TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4,
+ "Recede past start of string");
+
+ DECREF(iter);
+ }
+
+ DECREF(string);
+ DECREF(buf);
+}
+
+static void
+test_iterator_whitespace(TestBatchRunner *runner) {
+ int num_spaces;
+ String *ws_smiley = S_smiley_with_whitespace(&num_spaces);
+
+ {
+ StringIterator *iter = Str_Top(ws_smiley);
+ TEST_INT_EQ(runner, StrIter_Skip_Next_Whitespace(iter), num_spaces,
+ "Skip_Next_Whitespace");
+ TEST_INT_EQ(runner, StrIter_Skip_Next_Whitespace(iter), 0,
+ "Skip_Next_Whitespace without whitespace");
+ DECREF(iter);
+ }
+
+ {
+ StringIterator *iter = Str_Tail(ws_smiley);
+ TEST_INT_EQ(runner, StrIter_Skip_Prev_Whitespace(iter), num_spaces,
+ "Skip_Prev_Whitespace");
+ TEST_INT_EQ(runner, StrIter_Skip_Prev_Whitespace(iter), 0,
+ "Skip_Prev_Whitespace without whitespace");
+ DECREF(iter);
+ }
+
+ DECREF(ws_smiley);
+}
+
+static void
+test_iterator_substring(TestBatchRunner *runner) {
+ String *string = Str_newf("a%sb%sc%sd", smiley, smiley, smiley);
+
+ StringIterator *start = Str_Top(string);
+ StringIterator *end = Str_Tail(string);
+
+ {
+ String *substring = StrIter_substring(start, end);
+ TEST_TRUE(runner, Str_Equals(substring, (Obj*)string),
+ "StrIter_substring whole string");
+ DECREF(substring);
+ }
+
+ StrIter_Advance(start, 2);
+ StrIter_Recede(end, 2);
+
+ {
+ String *substring = StrIter_substring(start, end);
+ String *wanted = Str_newf("b%sc", smiley);
+ TEST_TRUE(runner, Str_Equals(substring, (Obj*)wanted),
+ "StrIter_substring");
+
+ TEST_TRUE(runner, StrIter_Starts_With(start, wanted), "Starts_With");
+
+ DECREF(wanted);
+ DECREF(substring);
+ }
+
+ DECREF(start);
+ DECREF(end);
+ DECREF(string);
+}
void
TestStr_Run_IMP(TestString *self, TestBatchRunner *runner) {
- TestBatchRunner_Plan(runner, (TestBatch*)self, 40);
+ TestBatchRunner_Plan(runner, (TestBatch*)self, 96);
test_Cat(runner);
test_Mimic_and_Clone(runner);
test_Code_Point_At_and_From(runner);
@@ -274,6 +453,9 @@ TestStr_Run_IMP(TestString *self, TestBatchRunner *runner) {
test_Trim(runner);
test_To_F64(runner);
test_To_I64(runner);
+ test_iterator(runner);
+ test_iterator_whitespace(runner);
+ test_iterator_substring(runner);
}