You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by mt...@apache.org on 2009/08/28 19:41:29 UTC

svn commit: r808967 - in /commons/sandbox/runtime/trunk/src/main/native: shared/string.c test/testsuite.c

Author: mturk
Date: Fri Aug 28 17:41:29 2009
New Revision: 808967

URL: http://svn.apache.org/viewvc?rev=808967&view=rev
Log:
Fix token counters

Modified:
    commons/sandbox/runtime/trunk/src/main/native/shared/string.c
    commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c

Modified: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=808967&r1=808966&r2=808967&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Fri Aug 28 17:41:29 2009
@@ -944,6 +944,22 @@
     return NULL;
 }
 
+static char *strspc_q(const char *s1)
+{
+    int s0;
+
+    /* Some early sanity check */
+    if (!s1 || !*s1)
+        return NULL;
+    while ((s0 = *s1++) != 0) {
+        if (acr_isspace(s0))
+            return (char *)(s1 - 1);
+        if (s0 == '\\' && *s1)
+            s1++; /* We have something escaped. Advance */
+    }
+    return NULL;
+}
+
 static wchar_t *wcschr_q(const wchar_t *s1, int ch)
 {
     int s0;
@@ -960,6 +976,22 @@
     return NULL;
 }
 
+static wchar_t *wcsspc_q(const wchar_t *s1)
+{
+    int s0;
+
+    /* Some early sanity check */
+    if (!s1 || !*s1)
+        return NULL;
+    while ((s0 = *s1++) != 0) {
+        if (iswspace(s0))
+            return (wchar_t *)(s1 - 1);
+        if (s0 == L'\\' && *s1)
+            s1++; /* We have something escaped. Advance */
+    }
+    return NULL;
+}
+
 ACR_DECLARE(char *) ACR_strctok(char *str, int sep, char **last)
 {
     char *tok;
@@ -1028,6 +1060,7 @@
 
 ACR_DECLARE(char *) ACR_strqtok(char *str, char **last)
 {
+    int   chr;
     char *tok;
 
     if (!str)                           /* subsequent call */
@@ -1037,11 +1070,10 @@
     while (*str && acr_isspace(*str))   /* skip leading delimiters */
          str++;
     if (*str == '"') {
-        int ch;
         /* Advance to the first unescaped quote */
         tok = str + 1;
-        while ((ch = *tok++) != 0) {
-            if (ch == '"') {
+        while ((chr = *tok++) != 0) {
+            if (chr == '"') {
                 if (*tok) {
                     *tok++ = '\0';
                     *last  = tok;
@@ -1050,7 +1082,7 @@
                     *last  = NULL;
                 return str;
             }
-            if (ch == '\\' && *tok)
+            if (chr == '\\' && *tok)
                 tok++;
         }
         /* Unterminated quote */
@@ -1058,7 +1090,7 @@
         /* Check for last empty token */
         return *str ? str : NULL;
     }
-    if ((tok = strpbrk(str, " \t"))) {
+    if ((tok = strspc_q(str))) {
         *tok++ = '\0';
         *last  = tok;
         return str;
@@ -1070,81 +1102,102 @@
     }
 }
 
-ACR_DECLARE(int) ACR_strnctok(const char *str, int sep)
+static const char *_strcqctok(const char *str, int sep, const char **last)
 {
-    int cnt = 1;
+    const char *tok;
 
-    while (*str && *str == sep)          /* skip leading delimiters */
-        str++;
-    while (*str) {
-        if (*str == sep) {
-            while (*str == sep)
-                str++;
-            if (*str)
-                cnt++;
+    if (!str)                       /* subsequent call */
+        str = *last;                /* start where we left off */
+    if (!str)                       /* no more tokens */
+        return NULL;
+    while (*str == sep)             /* skip leading delimiters */
+         str++;
+    if (*str == '"') {
+        int ch;
+        /* Advance to the first unescaped quote */
+        tok = str + 1;
+        while ((ch = *tok++) != 0) {
+            if (ch == '"') {
+                if (*tok) {
+                    *last = ++tok;
+                }
+                else
+                    *last  = NULL;
+                return str;
+            }
+            if (ch == '\\' && *tok)
+                tok++;
         }
-        else
-            str++;
+        /* Unterminated quote */
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
+    }
+    if ((tok = strchr_q(str, sep))) {
+        *last = ++tok;
+        return str;
+    }
+    else {
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
     }
-    return cnt;
 }
 
-ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep)
+/* Same as ACR_strqtok, but doesn't modify the string.
+ * Used internaly for token counting.
+ */
+static const char *_strcqtok(const char *str, const char **last)
 {
-    int cnt = 1;
+    int   chr;
+    const char *tok;
 
-    while (*str == sep)          /* skip leading delimiters */
-        str++;
-    while (*str) {
-        if (*str == '"') {
-            int ch;
-            str++;
-            /* Advance to the first unescaped quote */
-            while ((ch = *str++) != 0) {
-                if (ch == '"') {
-                    cnt++;
-                    break;
+    if (!str)                           /* subsequent call */
+        str = *last;                    /* start where we left off */
+    if (!str)                           /* no more tokens */
+        return NULL;
+    while (*str && acr_isspace(*str))   /* skip leading delimiters */
+         str++;
+    if (*str == '"') {
+        /* Advance to the first unescaped quote */
+        tok = str + 1;
+        while ((chr = *tok++) != 0) {
+            if (chr == '"') {
+                if (*tok) {
+                    *last  = ++tok;
                 }
-                if (ch == '\\' && *str)
-                    str++;
+                else
+                    *last  = NULL;
+                return str;
             }
-            continue;
-        }
-        if (*str == sep) {
-            while (*str == sep)
-                str++;
-            if (*str)
-                cnt++;
+            if (chr == '\\' && *tok)
+                tok++;
         }
-        else
-            str++;
+        /* Unterminated quote */
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
+    }
+    if ((tok = strspc_q(str))) {
+        *last  = ++tok;
+        return str;
+    }
+    else {
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
     }
-    return cnt;
 }
 
-ACR_DECLARE(int) ACR_strnqtok(const char *str)
+ACR_DECLARE(int) ACR_strnctok(const char *str, int sep)
 {
     int cnt = 1;
 
-    while (*str && acr_isspace(*str))          /* skip leading delimiters */
+    while (*str && *str == sep)          /* skip leading delimiters */
         str++;
     while (*str) {
-        if (*str == '"') {
-            int ch;
-            str++;
-            /* Advance to the first unescaped quote */
-            while ((ch = *str++) != 0) {
-                if (ch == '"') {
-                    cnt++;
-                    break;
-                }
-                if (ch == '\\' && *str)
-                    str++;
-            }
-            continue;
-        }
-        if (acr_isspace(*str)) {
-            while (acr_isspace(*str))
+        if (*str == sep) {
+            while (*str == sep)
                 str++;
             if (*str)
                 cnt++;
@@ -1155,21 +1208,30 @@
     return cnt;
 }
 
-ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep)
+ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep)
 {
-    int cnt = 1;
+    const char *token;
+    const char *state = NULL;
+    int cnt = 0;
+
+    token = _strcqctok(str, sep, &state);
+    while (token) {
+        cnt++;
+        token = _strcqctok(NULL, sep, &state);
+    }
+    return cnt;
+}
 
-    while (*str && *str == (wchar_t)sep)    /* skip leading delimiters */
-        str++;
-    while (*str) {
-        if (*str == (wchar_t)sep) {
-            while (*str == (wchar_t)sep)
-                str++;
-            if (*str)
-                cnt++;
-        }
-        else
-            str++;
+ACR_DECLARE(int) ACR_strnqtok(const char *str)
+{
+    const char *token;
+    const char *state = NULL;
+    int cnt = 0;
+
+    token = _strcqtok(str, &state);
+    while (token) {
+        cnt++;
+        token = _strcqtok(NULL, &state);
     }
     return cnt;
 }
@@ -1196,39 +1258,6 @@
     }
 }
 
-ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str)
-{
-    int cnt = 1;
-
-    while (*str && iswspace(*str))          /* skip leading delimiters */
-        str++;
-    while (*str) {
-        if (*str == L'"') {
-            wchar_t ch;
-            str++;
-            /* Advance to the first unescaped quote */
-            while ((ch = *str++) != 0) {
-                if (ch == L'"') {
-                    cnt++;
-                    break;
-                }
-                if (ch == L'\\' && *str)
-                    str++;
-            }
-            continue;
-        }
-        if (iswspace(*str)) {
-            while (iswspace(*str))
-                str++;
-            if (*str)
-                cnt++;
-        }
-        else
-            str++;
-    }
-    return cnt;
-}
-
 ACR_DECLARE(wchar_t *) ACR_wcsqctok(wchar_t *str, int sep, wchar_t **last)
 {
     wchar_t *tok;
@@ -1305,7 +1334,7 @@
         /* Check for last empty token */
         return *str ? str : NULL;
     }
-    if ((tok = wcspbrk(str, L" \t"))) {
+    if ((tok = wcsspc_q(str))) {
         *tok++ = L'\0';
         *last  = tok;
         return str;
@@ -1317,6 +1346,137 @@
     }
 }
 
+static const wchar_t *_wcscqctok(const wchar_t *str, int sep, const wchar_t **last)
+{
+    const wchar_t *tok;
+
+    if (!str)                       /* subsequent call */
+        str = *last;                /* start where we left off */
+    if (!str)                       /* no more tokens */
+        return NULL;
+    while (*str == sep)             /* skip leading delimiters */
+         str++;
+    if (*str == L'"') {
+        int ch;
+        /* Advance to the first unescaped quote */
+        tok = str + 1;
+        while ((ch = *tok++) != 0) {
+            if (ch == L'"') {
+                if (*tok) {
+                    *last = ++tok;
+                }
+                else
+                    *last  = NULL;
+                return str;
+            }
+            if (ch == L'\\' && *tok)
+                tok++;
+        }
+        /* Unterminated quote */
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
+    }
+    if ((tok = wcschr_q(str, sep))) {
+        *last  = ++tok;
+        return str;
+    }
+    else {
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
+    }
+}
+
+static const wchar_t *_wcscqtok(const wchar_t *str, const wchar_t **last)
+{
+    const wchar_t *tok;
+
+    if (!str)                           /* subsequent call */
+        str = *last;                    /* start where we left off */
+    if (!str)                           /* no more tokens */
+        return NULL;
+    while (*str && acr_isspace(*str))   /* skip leading delimiters */
+         str++;
+    if (*str == L'"') {
+        int ch;
+        /* Advance to the first unescaped quote */
+        tok = str + 1;
+        while ((ch = *tok++) != 0) {
+            if (ch == L'"') {
+                if (*tok) {
+                    *last  = ++tok;
+                }
+                else
+                    *last  = NULL;
+                return str;
+            }
+            if (ch == L'\\' && *tok)
+                tok++;
+        }
+        /* Unterminated quote */
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
+    }
+    if ((tok = wcsspc_q(str))) {
+        *last  = ++tok;
+        return str;
+    }
+    else {
+        *last = NULL;
+        /* Check for last empty token */
+        return *str ? str : NULL;
+    }
+}
+
+ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep)
+{
+    int cnt = 1;
+
+    while (*str && *str == (wchar_t)sep)    /* skip leading delimiters */
+        str++;
+    while (*str) {
+        if (*str == (wchar_t)sep) {
+            while (*str == (wchar_t)sep)
+                str++;
+            if (*str)
+                cnt++;
+        }
+        else
+            str++;
+    }
+    return cnt;
+}
+
+ACR_DECLARE(int) ACR_wcsnqctok(const wchar_t *str, int sep)
+{
+    const wchar_t *token;
+    const wchar_t *state = NULL;
+    int cnt = 0;
+
+    token = _wcscqctok(str, sep, &state);
+    while (token) {
+        cnt++;
+        token = _wcscqctok(NULL, sep, &state);
+    }
+    return cnt;
+}
+
+ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str)
+{
+    const wchar_t *token;
+    const wchar_t *state = NULL;
+    int cnt = 0;
+
+    token = _wcscqtok(str, &state);
+    while (token) {
+        cnt++;
+        token = _wcscqtok(NULL, &state);
+    }
+    return cnt;
+}
+
 ACR_DECLARE(size_t) ACR_MszStrCountA(const char *s)
 {
     size_t n = 0;

Modified: commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c?rev=808967&r1=808966&r2=808967&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c Fri Aug 28 17:41:29 2009
@@ -362,6 +362,48 @@
     return 0;
 }
 
+static const char *test_strings[] = {
+    "The quick brown fox jumps over the lazy dog",
+    " The quick brown fox jumps over the lazy dog ",
+    "\tThe quick\t brown \tfox\t\tjumps \t\tover\t\t the \t\t lazy dog \t ",
+    "The \"quick brown fox\" jumps over the \"lazy dog\"",
+    "The \"quick brown fox\" jumps\\ over the \"lazy dog\"",
+    "The\\ \"quick brown fox\"\\ jumps\\ over\\ the\\ \"lazy dog\"",
+#if 0
+    "Lorem ipsum dolor sit amet, consectetur adipisicing elit,"
+        "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
+        "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris "
+        "nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in "
+        "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla "
+        "pariatur. Excepteur sint occaecat cupidatat non proident, sunt in "
+        "culpa qui officia deserunt mollit anim id est laborum.",
+#endif
+    NULL
+};
+
+static int test_strtok(int argc, const char *const argv[])
+{
+    int i = 0;
+    char buf[2048];
+    char *tok;
+    char *end;
+
+    while (test_strings[i]) {
+        int c = 1;
+        strcpy(buf, test_strings[i]);
+        printf("\nTest %d %s\n", i, buf);
+        printf("        has %d tokens\n", ACR_strnqtok(buf));
+        tok = ACR_strqtok(buf, &end);
+        while (tok) {
+            printf("Test %d.%d : '%s'\n", i, c++, tok);
+            tok = ACR_strqtok(NULL, &end);
+        }
+        i++;
+    }
+    return 0;
+}
+
+
 int main(int argc, const char *const argv[])
 {
     int rv = 0;
@@ -433,6 +475,9 @@
         else if (!strcasecmp(run_test, "ring")) {
             rv = test_ring(argc, argv);
         }
+        else if (!strcasecmp(run_test, "strtok")) {
+            rv = test_strtok(argc, argv);
+        }
     }
 
 cleanup: