You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by mt...@apache.org on 2009/08/28 19:41:29 UTC
svn commit: r808967 - in /commons/sandbox/runtime/trunk/src/main/native:
shared/string.c test/testsuite.c
Author: mturk
Date: Fri Aug 28 17:41:29 2009
New Revision: 808967
URL: http://svn.apache.org/viewvc?rev=808967&view=rev
Log:
Fix token counters
Modified:
commons/sandbox/runtime/trunk/src/main/native/shared/string.c
commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c
Modified: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=808967&r1=808966&r2=808967&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Fri Aug 28 17:41:29 2009
@@ -944,6 +944,22 @@
return NULL;
}
+static char *strspc_q(const char *s1)
+{
+ int s0;
+
+ /* Some early sanity check */
+ if (!s1 || !*s1)
+ return NULL;
+ while ((s0 = *s1++) != 0) {
+ if (acr_isspace(s0))
+ return (char *)(s1 - 1);
+ if (s0 == '\\' && *s1)
+ s1++; /* We have something escaped. Advance */
+ }
+ return NULL;
+}
+
static wchar_t *wcschr_q(const wchar_t *s1, int ch)
{
int s0;
@@ -960,6 +976,22 @@
return NULL;
}
+static wchar_t *wcsspc_q(const wchar_t *s1)
+{
+ int s0;
+
+ /* Some early sanity check */
+ if (!s1 || !*s1)
+ return NULL;
+ while ((s0 = *s1++) != 0) {
+ if (iswspace(s0))
+ return (wchar_t *)(s1 - 1);
+ if (s0 == L'\\' && *s1)
+ s1++; /* We have something escaped. Advance */
+ }
+ return NULL;
+}
+
ACR_DECLARE(char *) ACR_strctok(char *str, int sep, char **last)
{
char *tok;
@@ -1028,6 +1060,7 @@
ACR_DECLARE(char *) ACR_strqtok(char *str, char **last)
{
+ int chr;
char *tok;
if (!str) /* subsequent call */
@@ -1037,11 +1070,10 @@
while (*str && acr_isspace(*str)) /* skip leading delimiters */
str++;
if (*str == '"') {
- int ch;
/* Advance to the first unescaped quote */
tok = str + 1;
- while ((ch = *tok++) != 0) {
- if (ch == '"') {
+ while ((chr = *tok++) != 0) {
+ if (chr == '"') {
if (*tok) {
*tok++ = '\0';
*last = tok;
@@ -1050,7 +1082,7 @@
*last = NULL;
return str;
}
- if (ch == '\\' && *tok)
+ if (chr == '\\' && *tok)
tok++;
}
/* Unterminated quote */
@@ -1058,7 +1090,7 @@
/* Check for last empty token */
return *str ? str : NULL;
}
- if ((tok = strpbrk(str, " \t"))) {
+ if ((tok = strspc_q(str))) {
*tok++ = '\0';
*last = tok;
return str;
@@ -1070,81 +1102,102 @@
}
}
-ACR_DECLARE(int) ACR_strnctok(const char *str, int sep)
+static const char *_strcqctok(const char *str, int sep, const char **last)
{
- int cnt = 1;
+ const char *tok;
- while (*str && *str == sep) /* skip leading delimiters */
- str++;
- while (*str) {
- if (*str == sep) {
- while (*str == sep)
- str++;
- if (*str)
- cnt++;
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str == sep) /* skip leading delimiters */
+ str++;
+ if (*str == '"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == '"') {
+ if (*tok) {
+ *last = ++tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == '\\' && *tok)
+ tok++;
}
- else
- str++;
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = strchr_q(str, sep))) {
+ *last = ++tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
}
- return cnt;
}
-ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep)
+/* Same as ACR_strqtok, but doesn't modify the string.
+ * Used internaly for token counting.
+ */
+static const char *_strcqtok(const char *str, const char **last)
{
- int cnt = 1;
+ int chr;
+ const char *tok;
- while (*str == sep) /* skip leading delimiters */
- str++;
- while (*str) {
- if (*str == '"') {
- int ch;
- str++;
- /* Advance to the first unescaped quote */
- while ((ch = *str++) != 0) {
- if (ch == '"') {
- cnt++;
- break;
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str && acr_isspace(*str)) /* skip leading delimiters */
+ str++;
+ if (*str == '"') {
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((chr = *tok++) != 0) {
+ if (chr == '"') {
+ if (*tok) {
+ *last = ++tok;
}
- if (ch == '\\' && *str)
- str++;
+ else
+ *last = NULL;
+ return str;
}
- continue;
- }
- if (*str == sep) {
- while (*str == sep)
- str++;
- if (*str)
- cnt++;
+ if (chr == '\\' && *tok)
+ tok++;
}
- else
- str++;
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = strspc_q(str))) {
+ *last = ++tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
}
- return cnt;
}
-ACR_DECLARE(int) ACR_strnqtok(const char *str)
+ACR_DECLARE(int) ACR_strnctok(const char *str, int sep)
{
int cnt = 1;
- while (*str && acr_isspace(*str)) /* skip leading delimiters */
+ while (*str && *str == sep) /* skip leading delimiters */
str++;
while (*str) {
- if (*str == '"') {
- int ch;
- str++;
- /* Advance to the first unescaped quote */
- while ((ch = *str++) != 0) {
- if (ch == '"') {
- cnt++;
- break;
- }
- if (ch == '\\' && *str)
- str++;
- }
- continue;
- }
- if (acr_isspace(*str)) {
- while (acr_isspace(*str))
+ if (*str == sep) {
+ while (*str == sep)
str++;
if (*str)
cnt++;
@@ -1155,21 +1208,30 @@
return cnt;
}
-ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep)
+ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep)
{
- int cnt = 1;
+ const char *token;
+ const char *state = NULL;
+ int cnt = 0;
+
+ token = _strcqctok(str, sep, &state);
+ while (token) {
+ cnt++;
+ token = _strcqctok(NULL, sep, &state);
+ }
+ return cnt;
+}
- while (*str && *str == (wchar_t)sep) /* skip leading delimiters */
- str++;
- while (*str) {
- if (*str == (wchar_t)sep) {
- while (*str == (wchar_t)sep)
- str++;
- if (*str)
- cnt++;
- }
- else
- str++;
+ACR_DECLARE(int) ACR_strnqtok(const char *str)
+{
+ const char *token;
+ const char *state = NULL;
+ int cnt = 0;
+
+ token = _strcqtok(str, &state);
+ while (token) {
+ cnt++;
+ token = _strcqtok(NULL, &state);
}
return cnt;
}
@@ -1196,39 +1258,6 @@
}
}
-ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str)
-{
- int cnt = 1;
-
- while (*str && iswspace(*str)) /* skip leading delimiters */
- str++;
- while (*str) {
- if (*str == L'"') {
- wchar_t ch;
- str++;
- /* Advance to the first unescaped quote */
- while ((ch = *str++) != 0) {
- if (ch == L'"') {
- cnt++;
- break;
- }
- if (ch == L'\\' && *str)
- str++;
- }
- continue;
- }
- if (iswspace(*str)) {
- while (iswspace(*str))
- str++;
- if (*str)
- cnt++;
- }
- else
- str++;
- }
- return cnt;
-}
-
ACR_DECLARE(wchar_t *) ACR_wcsqctok(wchar_t *str, int sep, wchar_t **last)
{
wchar_t *tok;
@@ -1305,7 +1334,7 @@
/* Check for last empty token */
return *str ? str : NULL;
}
- if ((tok = wcspbrk(str, L" \t"))) {
+ if ((tok = wcsspc_q(str))) {
*tok++ = L'\0';
*last = tok;
return str;
@@ -1317,6 +1346,137 @@
}
}
+static const wchar_t *_wcscqctok(const wchar_t *str, int sep, const wchar_t **last)
+{
+ const wchar_t *tok;
+
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str == sep) /* skip leading delimiters */
+ str++;
+ if (*str == L'"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == L'"') {
+ if (*tok) {
+ *last = ++tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == L'\\' && *tok)
+ tok++;
+ }
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = wcschr_q(str, sep))) {
+ *last = ++tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+}
+
+static const wchar_t *_wcscqtok(const wchar_t *str, const wchar_t **last)
+{
+ const wchar_t *tok;
+
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str && acr_isspace(*str)) /* skip leading delimiters */
+ str++;
+ if (*str == L'"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == L'"') {
+ if (*tok) {
+ *last = ++tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == L'\\' && *tok)
+ tok++;
+ }
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = wcsspc_q(str))) {
+ *last = ++tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+}
+
+ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep)
+{
+ int cnt = 1;
+
+ while (*str && *str == (wchar_t)sep) /* skip leading delimiters */
+ str++;
+ while (*str) {
+ if (*str == (wchar_t)sep) {
+ while (*str == (wchar_t)sep)
+ str++;
+ if (*str)
+ cnt++;
+ }
+ else
+ str++;
+ }
+ return cnt;
+}
+
+ACR_DECLARE(int) ACR_wcsnqctok(const wchar_t *str, int sep)
+{
+ const wchar_t *token;
+ const wchar_t *state = NULL;
+ int cnt = 0;
+
+ token = _wcscqctok(str, sep, &state);
+ while (token) {
+ cnt++;
+ token = _wcscqctok(NULL, sep, &state);
+ }
+ return cnt;
+}
+
+ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str)
+{
+ const wchar_t *token;
+ const wchar_t *state = NULL;
+ int cnt = 0;
+
+ token = _wcscqtok(str, &state);
+ while (token) {
+ cnt++;
+ token = _wcscqtok(NULL, &state);
+ }
+ return cnt;
+}
+
ACR_DECLARE(size_t) ACR_MszStrCountA(const char *s)
{
size_t n = 0;
Modified: commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c?rev=808967&r1=808966&r2=808967&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/test/testsuite.c Fri Aug 28 17:41:29 2009
@@ -362,6 +362,48 @@
return 0;
}
+static const char *test_strings[] = {
+ "The quick brown fox jumps over the lazy dog",
+ " The quick brown fox jumps over the lazy dog ",
+ "\tThe quick\t brown \tfox\t\tjumps \t\tover\t\t the \t\t lazy dog \t ",
+ "The \"quick brown fox\" jumps over the \"lazy dog\"",
+ "The \"quick brown fox\" jumps\\ over the \"lazy dog\"",
+ "The\\ \"quick brown fox\"\\ jumps\\ over\\ the\\ \"lazy dog\"",
+#if 0
+ "Lorem ipsum dolor sit amet, consectetur adipisicing elit,"
+ "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
+ "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris "
+ "nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in "
+ "reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla "
+ "pariatur. Excepteur sint occaecat cupidatat non proident, sunt in "
+ "culpa qui officia deserunt mollit anim id est laborum.",
+#endif
+ NULL
+};
+
+static int test_strtok(int argc, const char *const argv[])
+{
+ int i = 0;
+ char buf[2048];
+ char *tok;
+ char *end;
+
+ while (test_strings[i]) {
+ int c = 1;
+ strcpy(buf, test_strings[i]);
+ printf("\nTest %d %s\n", i, buf);
+ printf(" has %d tokens\n", ACR_strnqtok(buf));
+ tok = ACR_strqtok(buf, &end);
+ while (tok) {
+ printf("Test %d.%d : '%s'\n", i, c++, tok);
+ tok = ACR_strqtok(NULL, &end);
+ }
+ i++;
+ }
+ return 0;
+}
+
+
int main(int argc, const char *const argv[])
{
int rv = 0;
@@ -433,6 +475,9 @@
else if (!strcasecmp(run_test, "ring")) {
rv = test_ring(argc, argv);
}
+ else if (!strcasecmp(run_test, "strtok")) {
+ rv = test_strtok(argc, argv);
+ }
}
cleanup: