You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by nw...@apache.org on 2013/09/06 00:11:21 UTC

[lucy-commits] [2/4] git commit: refs/heads/cfish-string-prep1 - Initial implementation of StringIterator

Initial implementation of StringIterator


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/f7edbe56
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/f7edbe56
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/f7edbe56

Branch: refs/heads/cfish-string-prep1
Commit: f7edbe56b38789de10c6ed34faec7836919ad8c5
Parents: cfea9e6
Author: Nick Wellnhofer <we...@aevum.de>
Authored: Wed Sep 4 22:42:00 2013 +0200
Committer: Nick Wellnhofer <we...@aevum.de>
Committed: Wed Sep 4 22:48:21 2013 +0200

----------------------------------------------------------------------
 clownfish/runtime/core/Clownfish/String.c       | 314 +++++++++++++++++++
 clownfish/runtime/core/Clownfish/String.cfh     | 121 +++++++
 .../runtime/core/Clownfish/Test/TestString.c    | 220 +++++++++++--
 3 files changed, 636 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/f7edbe56/clownfish/runtime/core/Clownfish/String.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/String.c b/clownfish/runtime/core/Clownfish/String.c
index 07da5d8..10dac38 100644
--- a/clownfish/runtime/core/Clownfish/String.c
+++ b/clownfish/runtime/core/Clownfish/String.c
@@ -17,6 +17,8 @@
 #define C_CFISH_STRING
 #define C_CFISH_VIEWCHARBUF
 #define C_CFISH_STACKSTRING
+#define C_CFISH_STRINGITERATOR
+#define C_CFISH_STACKSTRINGITERATOR
 #define CFISH_USE_SHORT_NAMES
 #define CHY_USE_SHORT_NAMES
 
@@ -35,6 +37,11 @@
 #include "Clownfish/Util/Memory.h"
 #include "Clownfish/Util/StringHelper.h"
 
+#define STR_STACKTOP(string) \
+    Str_StackTop(string, alloca(sizeof(StackStringIterator)))
+#define STR_STACKTAIL(string) \
+    Str_StackTail(string, alloca(sizeof(StackStringIterator)))
+
 // Helper function for throwing invalid UTF-8 error. Since THROW uses
 // a String internally, calling THROW with invalid UTF-8 would create an
 // infinite loop -- so we fwrite some of the bogus text to stderr and
@@ -577,6 +584,26 @@ Str_Get_Ptr8_IMP(String *self) {
     return (uint8_t*)self->ptr;
 }
 
+StringIterator*
+Str_Top_IMP(String *self) {
+    return StrIter_new(self, 0);
+}
+
+StringIterator*
+Str_Tail_IMP(String *self) {
+    return StrIter_new(self, self->size);
+}
+
+StackStringIterator*
+Str_StackTop_IMP(String *self, void *allocation) {
+    return SStrIter_new(allocation, self, 0);
+}
+
+StackStringIterator*
+Str_StackTail_IMP(String *self, void *allocation) {
+    return SStrIter_new(allocation, self, self->size);
+}
+
 /*****************************************************************/
 
 ViewCharBuf*
@@ -765,4 +792,291 @@ SStr_Destroy_IMP(StackString *self) {
     THROW(ERR, "Can't destroy a StackString ('%o')", self);
 }
 
+/*****************************************************************/
+
+StringIterator*
+StrIter_new(String *string, size_t byte_offset) {
+    StringIterator *self = (StringIterator*)VTable_Make_Obj(STRINGITERATOR);
+    self->string      = (String*)INCREF(string);
+    self->byte_offset = byte_offset;
+    return self;
+}
+
+String*
+StrIter_substring(StringIterator *top, StringIterator *tail) {
+    String *string = top->string;
+
+    if (string != tail->string) {
+        THROW(ERR, "StrIter_substring: strings don't match");
+    }
+    if (top->byte_offset > tail->byte_offset) {
+        THROW(ERR, "StrIter_substring: top is behind tail");
+    }
+    if (tail->byte_offset > string->size) {
+        THROW(ERR, "Invalid StringIterator offset");
+    }
+
+    return Str_new_from_trusted_utf8(string->ptr + top->byte_offset,
+                                     tail->byte_offset - top->byte_offset);
+}
+
+Obj*
+StrIter_Clone_IMP(StringIterator *self) {
+    return (Obj*)StrIter_new(self->string, self->byte_offset);
+}
+
+void
+StrIter_Assign_IMP(StringIterator *self, StringIterator *other) {
+    if (self->string != other->string) {
+        DECREF(self->string);
+        self->string = (String*)INCREF(other->string);
+    }
+    self->byte_offset = other->byte_offset;
+}
+
+bool
+StrIter_Equals_IMP(StringIterator *self, Obj *other) {
+    StringIterator *const twin = (StringIterator*)other;
+    if (twin == self)                     { return true; }
+    if (!Obj_Is_A(other, STRINGITERATOR)) { return false; }
+    return self->string == twin->string
+           && self->byte_offset == twin->byte_offset;
+}
+
+int32_t
+StrIter_Compare_To_IMP(StringIterator *self, Obj *other) {
+    StringIterator *twin = (StringIterator*)CERTIFY(other, STRINGITERATOR);
+    if (self->string != twin->string) {
+        THROW(ERR, "Can't compare iterators of different strings");
+    }
+    if (self->byte_offset < twin->byte_offset) { return -1; }
+    if (self->byte_offset > twin->byte_offset) { return 1; }
+    return 0;
+}
+
+bool
+StrIter_Has_Next_IMP(StringIterator *self) {
+    return self->byte_offset < self->string->size;
+}
+
+bool
+StrIter_Has_Prev_IMP(StringIterator *self) {
+    return self->byte_offset != 0;
+}
+
+uint32_t
+StrIter_Next_IMP(StringIterator *self) {
+    String *string      = self->string;
+    size_t  byte_offset = self->byte_offset;
+    size_t  size        = string->size;
+
+    if (byte_offset >= size) { return STRITER_DONE; }
+
+    const uint8_t *const ptr = (const uint8_t*)string->ptr;
+    uint32_t retval = ptr[byte_offset++];
+
+    if (retval >= 0x80) {
+        /*
+         * The 'mask' bit is tricky. In each iteration, 'retval' is
+         * left-shifted by 6 and 'mask' by 5 bits. So relative to the first
+         * byte of the sequence, 'mask' moves one bit to the right.
+         *
+         * The possible outcomes after the loop are:
+         *
+         * Two byte sequence
+         * retval: 110aaaaa bbbbbb
+         * mask:   00100000 000000
+         *
+         * Three byte sequence
+         * retval: 1110aaaa bbbbbb cccccc
+         * mask:   00010000 000000 000000
+         *
+         * Four byte sequence
+         * retval: 11110aaa bbbbbb cccccc dddddd
+         * mask:   00001000 000000 000000 000000
+         *
+         * This also illustrates why the exit condition (retval & mask)
+         * works. After the first iteration, the third most significant bit
+         * is tested. After the second iteration, the fourth, and so on.
+         */
+
+        uint32_t mask = 1 << 6;
+
+        do {
+            if (byte_offset >= size) {
+                THROW(ERR, "StrIter_Next: Invalid UTF-8");
+            }
+
+            retval = (retval << 6) | (ptr[byte_offset++] & 0x3F);
+            mask <<= 5;
+        } while (retval & mask);
+
+        retval &= mask - 1;
+    }
+
+    self->byte_offset = byte_offset;
+    return retval;
+}
+
+uint32_t
+StrIter_Prev_IMP(StringIterator *self) {
+    size_t byte_offset = self->byte_offset;
+
+    if (byte_offset == 0) { return STRITER_DONE; }
+
+    const uint8_t *const ptr = (const uint8_t*)self->string->ptr;
+    uint32_t retval = ptr[--byte_offset];
+
+    if (retval >= 0x80) {
+        // Construct the result from right to left.
+
+        if (byte_offset == 0) {
+            THROW(ERR, "StrIter_Prev: Invalid UTF-8");
+        }
+
+        retval &= 0x3F;
+        int shift = 6;
+        uint32_t first_byte_mask = 0x1F;
+        uint32_t byte = ptr[--byte_offset];
+
+        while ((byte & 0xC0) == 0x80) {
+            if (byte_offset == 0) {
+                THROW(ERR, "StrIter_Prev: Invalid UTF-8");
+            }
+
+            retval |= (byte & 0x3F) << shift;
+            shift += 6;
+            first_byte_mask >>= 1;
+            byte = ptr[--byte_offset];
+        }
+
+        retval |= (byte & first_byte_mask) << shift;
+    }
+
+    self->byte_offset = byte_offset;
+    return retval;
+}
+
+size_t
+StrIter_Advance_IMP(StringIterator *self, size_t num) {
+    size_t num_skipped = 0;
+    size_t byte_offset = self->byte_offset;
+    size_t size        = self->string->size;
+    const uint8_t *const ptr = (const uint8_t*)self->string->ptr;
+
+    while (num_skipped < num) {
+        if (byte_offset >= size) {
+            break;
+        }
+        uint8_t first_byte = ptr[byte_offset];
+        byte_offset += StrHelp_UTF8_COUNT[first_byte];
+        ++num_skipped;
+    }
+
+    if (byte_offset > size) {
+        THROW(ERR, "StrIter_Advance: Invalid UTF-8");
+    }
+
+    self->byte_offset = byte_offset;
+    return num_skipped;
+}
+
+size_t
+StrIter_Recede_IMP(StringIterator *self, size_t num) {
+    size_t num_skipped = 0;
+    size_t byte_offset = self->byte_offset;
+    const uint8_t *const ptr = (const uint8_t*)self->string->ptr;
+
+    while (num_skipped < num) {
+        if (byte_offset == 0) {
+            break;
+        }
+
+        uint8_t byte;
+        do {
+            if (byte_offset == 0) {
+                THROW(ERR, "StrIter_Recede: Invalid UTF-8");
+            }
+
+            byte = ptr[--byte_offset];
+        } while ((byte & 0xC0) == 0x80);
+        ++num_skipped;
+    }
+
+    self->byte_offset = byte_offset;
+    return num_skipped;
+}
+
+size_t
+StrIter_Skip_Next_Whitespace_IMP(StringIterator *self) {
+    size_t   num_skipped = 0;
+    size_t   byte_offset = self->byte_offset;
+    uint32_t code_point;
+
+    while (STRITER_DONE != (code_point = StrIter_Next(self))) {
+        if (!StrHelp_is_whitespace(code_point)) { break; }
+        byte_offset = self->byte_offset;
+        ++num_skipped;
+    }
+
+    self->byte_offset = byte_offset;
+    return num_skipped;
+}
+
+size_t
+StrIter_Skip_Prev_Whitespace_IMP(StringIterator *self) {
+    size_t   num_skipped = 0;
+    size_t   byte_offset = self->byte_offset;
+    uint32_t code_point;
+
+    while (STRITER_DONE != (code_point = StrIter_Prev(self))) {
+        if (!StrHelp_is_whitespace(code_point)) { break; }
+        byte_offset = self->byte_offset;
+        ++num_skipped;
+    }
+
+    self->byte_offset = byte_offset;
+    return num_skipped;
+}
+
+bool
+StrIter_Starts_With_IMP(StringIterator *self, String *prefix) {
+    String *string      = self->string;
+    size_t  byte_offset = self->byte_offset;
+
+    if (byte_offset > string->size) {
+        THROW(ERR, "Invalid StringIterator offset");
+    }
+
+    if (string->size - byte_offset < prefix->size) { return false; }
+
+    const char *ptr = string->ptr + byte_offset;
+    return memcmp(ptr, prefix->ptr, prefix->size) == 0;
+}
+
+void
+StrIter_Destroy_IMP(StringIterator *self) {
+    DECREF(self->string);
+    SUPER_DESTROY(self, STRINGITERATOR);
+}
+
+/*****************************************************************/
+
+StackStringIterator*
+SStrIter_new(void *allocation, String *string, size_t byte_offset) {
+    StackStringIterator *self
+        = (StackStringIterator*)VTable_Init_Obj(STACKSTRINGITERATOR,
+                                                allocation);
+    // Assume that the string will be available for the lifetime of the
+    // iterator and don't increase its refcount.
+    self->string      = string;
+    self->byte_offset = byte_offset;
+    return self;
+}
+
+void
+SStrIter_Destroy_IMP(StackStringIterator *self) {
+    THROW(ERR, "Can't destroy a StackStringIterator");
+}
+
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/f7edbe56/clownfish/runtime/core/Clownfish/String.cfh
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/String.cfh b/clownfish/runtime/core/Clownfish/String.cfh
index f56b26c..d6d378a 100644
--- a/clownfish/runtime/core/Clownfish/String.cfh
+++ b/clownfish/runtime/core/Clownfish/String.cfh
@@ -270,6 +270,26 @@ class Clownfish::String cnick Str
      */
     incremented String*
     SubString(String *self, size_t offset, size_t len);
+
+    /** Return an iterator to the start of the string.
+     */
+    incremented StringIterator*
+    Top(String *self);
+
+    /** Return an iterator to the end of the string.
+     */
+    incremented StringIterator*
+    Tail(String *self);
+
+    /** Return a stack iterator to the start of the string.
+     */
+    incremented StackStringIterator*
+    StackTop(String *self, void *allocation);
+
+    /** Return a stack iterator to the end of the string.
+     */
+    incremented StackStringIterator*
+    StackTail(String *self, void *allocation);
 }
 
 class Clownfish::ViewCharBuf cnick ViewCB
@@ -356,6 +376,104 @@ class Clownfish::StackString cnick SStr
     Destroy(StackString *self);
 }
 
+class Clownfish::StringIterator cnick StrIter
+    inherits Clownfish::Obj {
+
+    String *string;
+    size_t  byte_offset;
+
+    inert incremented StringIterator*
+    new(String *string, size_t byte_offset);
+
+    /** Return the substring between the top and tail iterators.
+     * @param offset Offset from the top, in code points.
+     * @param len The desired length of the substring, in code points.
+     */
+    inert incremented String*
+    substring(StringIterator *top, StringIterator *tail);
+
+    public incremented Obj*
+    Clone(StringIterator *self);
+
+    public void
+    Assign(StringIterator *self, StringIterator *other);
+
+    public bool
+    Equals(StringIterator *self, Obj *other);
+
+    public int32_t
+    Compare_To(StringIterator *self, Obj *other);
+
+    /** Return true if the iterator is not at the end of the string.
+     */
+    public bool
+    Has_Next(StringIterator *self);
+
+    /** Return true if the iterator is not at the start of the string.
+     */
+    public bool
+    Has_Prev(StringIterator *self);
+
+    /** Return the code point after the current position and advance the
+     * iterator. Return CFISH_STRITER_DONE at the end of the string.
+     */
+    public uint32_t
+    Next(StringIterator *self);
+
+    /** Return the code point before the current position and go one step back.
+     * Return CFISH_STRITER_DONE at the start of the string.
+     */
+    public uint32_t
+    Prev(StringIterator *self);
+
+    /** Skip code points.
+     * @param num The number of code points to skip.
+     * @return the number of code points actually skipped. This can be less
+     * than the requested number if the end of the string is reached.
+     */
+    public size_t
+    Advance(StringIterator *self, size_t num);
+
+    /** Skip code points backward.
+     * @param num The number of code points to skip.
+     * @return the number of code points actually skipped. This can be less
+     * than the requested number if the start of the string is reached.
+     */
+    public size_t
+    Recede(StringIterator *self, size_t num);
+
+    /** Skip whitespace.
+     * @return the number of code points skipped.
+     */
+    public size_t
+    Skip_Next_Whitespace(StringIterator *self);
+
+    /** Skip whitespace backward.
+     * @return the number of code points skipped.
+     */
+    public size_t
+    Skip_Prev_Whitespace(StringIterator *self);
+
+    /** Test whether the content after the iterator starts with
+     * <code>prefix</code>.
+     */
+    bool
+    Starts_With(StringIterator *self, String *prefix);
+
+    public void
+    Destroy(StringIterator *self);
+}
+
+class Clownfish::StackStringIterator cnick SStrIter
+    inherits Clownfish::StringIterator {
+
+    inert incremented StackStringIterator*
+    new(void *allocation, String *string, size_t byte_offset);
+
+    public void
+    Destroy(StackStringIterator *self);
+}
+
 __C__
 
 #define CFISH_SStr_BLANK() cfish_SStr_new(cfish_alloca(cfish_SStr_size()))
@@ -366,10 +484,13 @@ __C__
 #define CFISH_SStr_WRAP_STR(ptr, size) \
     cfish_SStr_wrap_str(cfish_alloca(cfish_SStr_size()), ptr, size)
 
+#define CFISH_STRITER_DONE  UINT32_MAX
+
 #ifdef CFISH_USE_SHORT_NAMES
   #define SStr_BLANK             CFISH_SStr_BLANK
   #define SSTR_WRAP              CFISH_SStr_WRAP
   #define SSTR_WRAP_STR          CFISH_SStr_WRAP_STR
+  #define STRITER_DONE          CFISH_STRITER_DONE
 #endif
 __END_C__
 

http://git-wip-us.apache.org/repos/asf/lucy/blob/f7edbe56/clownfish/runtime/core/Clownfish/Test/TestString.c
----------------------------------------------------------------------
diff --git a/clownfish/runtime/core/Clownfish/Test/TestString.c b/clownfish/runtime/core/Clownfish/Test/TestString.c
index 5f8337c..46bc62d 100644
--- a/clownfish/runtime/core/Clownfish/Test/TestString.c
+++ b/clownfish/runtime/core/Clownfish/Test/TestString.c
@@ -46,6 +46,30 @@ S_get_str(const char *string) {
     return Str_new_from_utf8(string, strlen(string));
 }
 
+// Surround a smiley with lots of whitespace.
+static String*
+S_smiley_with_whitespace(int *num_spaces_ptr) {
+    uint32_t spaces[] = {
+        ' ',    '\t',   '\r',   '\n',   0x000B, 0x000C, 0x000D, 0x0085,
+        0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
+        0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029,
+        0x202F, 0x205F, 0x3000
+    };
+    int num_spaces = sizeof(spaces) / sizeof(uint32_t);
+    String *got;
+
+    CharBuf *buf = CB_new(0);
+    for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
+    CB_Cat_Char(buf, 0x263A);
+    for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
+
+    String *retval = CB_To_String(buf);
+    if (num_spaces_ptr) { *num_spaces_ptr = num_spaces; }
+
+    DECREF(buf);
+    return retval;
+}
+
 static void
 test_Cat(TestBatchRunner *runner) {
     String *wanted = Str_newf("a%s", smiley);
@@ -185,23 +209,9 @@ test_Truncate(TestBatchRunner *runner) {
 
 static void
 test_Trim(TestBatchRunner *runner) {
-    uint32_t spaces[] = {
-        ' ',    '\t',   '\r',   '\n',   0x000B, 0x000C, 0x000D, 0x0085,
-        0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
-        0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029,
-        0x202F, 0x205F, 0x3000
-    };
-    uint32_t num_spaces = sizeof(spaces) / sizeof(uint32_t);
-    uint32_t i;
     String *got;
 
-    // Surround a smiley with lots of whitespace.
-    CharBuf *buf = CB_new(0);
-    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
-    CB_Cat_Char(buf, 0x263A);
-    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
-
-    got = CB_To_String(buf);
+    got = S_smiley_with_whitespace(NULL);
     TEST_TRUE(runner, Str_Trim_Top(got), "Trim_Top returns true on success");
     TEST_FALSE(runner, Str_Trim_Top(got),
                "Trim_Top returns false on failure");
@@ -212,14 +222,12 @@ test_Trim(TestBatchRunner *runner) {
               "Trim_Top and Trim_Tail worked");
     DECREF(got);
 
-    got = CB_To_String(buf);
+    got = S_smiley_with_whitespace(NULL);
     TEST_TRUE(runner, Str_Trim(got), "Trim returns true on success");
     TEST_FALSE(runner, Str_Trim(got), "Trim returns false on failure");
     TEST_TRUE(runner, Str_Equals_Str(got, smiley, smiley_len),
               "Trim worked");
     DECREF(got);
-
-    DECREF(buf);
 }
 
 static void
@@ -260,10 +268,181 @@ test_To_I64(TestBatchRunner *runner) {
     DECREF(string);
 }
 
+static void
+test_iterator(TestBatchRunner *runner) {
+    static const uint32_t code_points[] = {
+        0x41,
+        0x7F,
+        0x80,
+        0x7FF,
+        0x800,
+        0xFFFF,
+        0x10000,
+        0x10FFFF
+    };
+    static size_t num_code_points
+        = sizeof(code_points) / sizeof(code_points[0]);
+
+    CharBuf *buf = CB_new(0);
+    for (int i = 0; i < num_code_points; ++i) {
+        CB_Cat_Char(buf, code_points[i]);
+    }
+    String *string = CB_To_String(buf);
+
+    {
+        StringIterator *top  = Str_Top(string);
+        StringIterator *tail = Str_Tail(string);
+
+        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1,
+                    "Compare_To top < tail");
+        TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1,
+                    "Compare_To tail > top");
+        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0,
+                    "Compare_To top == top");
+
+        StringIterator *clone = (StringIterator*)StrIter_Clone(top);
+        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone");
+
+        StrIter_Assign(clone, tail);
+        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign");
+
+        DECREF(clone);
+        DECREF(top);
+        DECREF(tail);
+    }
+
+    {
+        StringIterator *iter = Str_Top(string);
+
+        for (int i = 0; i < num_code_points; ++i) {
+            TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i);
+            uint32_t code_point = StrIter_Next(iter);
+            TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i);
+        }
+
+        TEST_TRUE(runner, !StrIter_Has_Next(iter),
+                  "Has_Next at end of string");
+        TEST_INT_EQ(runner, StrIter_Next(iter), STRITER_DONE,
+                    "Next at end of string");
+
+        StringIterator *tail = Str_Tail(string);
+        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail");
+
+        DECREF(tail);
+        DECREF(iter);
+    }
+
+    {
+        StringIterator *iter = Str_Tail(string);
+
+        for (int i = num_code_points - 1; i >= 0; --i) {
+            TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i);
+            uint32_t code_point = StrIter_Prev(iter);
+            TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i);
+        }
+
+        TEST_TRUE(runner, !StrIter_Has_Prev(iter),
+                  "Has_Prev at end of string");
+        TEST_INT_EQ(runner, StrIter_Prev(iter), STRITER_DONE,
+                    "Prev at start of string");
+
+        StringIterator *top = Str_Top(string);
+        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top");
+
+        DECREF(top);
+        DECREF(iter);
+    }
+
+    {
+        StringIterator *iter = Str_Top(string);
+
+        StrIter_Next(iter);
+        TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2,
+                    "Advance returns number of code points");
+        TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3],
+                    "Advance works");
+        TEST_INT_EQ(runner,
+                    StrIter_Advance(iter, 1000000), num_code_points - 4,
+                    "Advance past end of string");
+
+        StrIter_Prev(iter);
+        TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2,
+                    "Recede returns number of code points");
+        TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4],
+                    "Recede works");
+        TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4,
+                    "Recede past start of string");
+
+        DECREF(iter);
+    }
+
+    DECREF(string);
+    DECREF(buf);
+}
+
+static void
+test_iterator_whitespace(TestBatchRunner *runner) {
+    int num_spaces;
+    String *ws_smiley = S_smiley_with_whitespace(&num_spaces);
+
+    {
+        StringIterator *iter = Str_Top(ws_smiley);
+        TEST_INT_EQ(runner, StrIter_Skip_Next_Whitespace(iter), num_spaces,
+                    "Skip_Next_Whitespace");
+        TEST_INT_EQ(runner, StrIter_Skip_Next_Whitespace(iter), 0,
+                    "Skip_Next_Whitespace without whitespace");
+        DECREF(iter);
+    }
+
+    {
+        StringIterator *iter = Str_Tail(ws_smiley);
+        TEST_INT_EQ(runner, StrIter_Skip_Prev_Whitespace(iter), num_spaces,
+                    "Skip_Prev_Whitespace");
+        TEST_INT_EQ(runner, StrIter_Skip_Prev_Whitespace(iter), 0,
+                    "Skip_Prev_Whitespace without whitespace");
+        DECREF(iter);
+    }
+
+    DECREF(ws_smiley);
+}
+
+static void
+test_iterator_substring(TestBatchRunner *runner) {
+    String *string = Str_newf("a%sb%sc%sd", smiley, smiley, smiley);
+
+    StringIterator *start = Str_Top(string);
+    StringIterator *end = Str_Tail(string);
+
+    {
+        String *substring = StrIter_substring(start, end);
+        TEST_TRUE(runner, Str_Equals(substring, (Obj*)string),
+                  "StrIter_substring whole string");
+        DECREF(substring);
+    }
+
+    StrIter_Advance(start, 2);
+    StrIter_Recede(end, 2);
+
+    {
+        String *substring = StrIter_substring(start, end);
+        String *wanted = Str_newf("b%sc", smiley);
+        TEST_TRUE(runner, Str_Equals(substring, (Obj*)wanted),
+                  "StrIter_substring");
+
+        TEST_TRUE(runner, StrIter_Starts_With(start, wanted), "Starts_With");
+
+        DECREF(wanted);
+        DECREF(substring);
+    }
+
+    DECREF(start);
+    DECREF(end);
+    DECREF(string);
+}
 
 void
 TestStr_Run_IMP(TestString *self, TestBatchRunner *runner) {
-    TestBatchRunner_Plan(runner, (TestBatch*)self, 40);
+    TestBatchRunner_Plan(runner, (TestBatch*)self, 96);
     test_Cat(runner);
     test_Mimic_and_Clone(runner);
     test_Code_Point_At_and_From(runner);
@@ -274,6 +453,9 @@ TestStr_Run_IMP(TestString *self, TestBatchRunner *runner) {
     test_Trim(runner);
     test_To_F64(runner);
     test_To_I64(runner);
+    test_iterator(runner);
+    test_iterator_whitespace(runner);
+    test_iterator_substring(runner);
 }