You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by mt...@apache.org on 2009/08/27 18:10:38 UTC
svn commit: r808482 - in /commons/sandbox/runtime/trunk/src/main/native:
include/acr_string.h shared/string.c test/testcase.c
Author: mturk
Date: Thu Aug 27 16:10:36 2009
New Revision: 808482
URL: http://svn.apache.org/viewvc?rev=808482&view=rev
Log:
Add few more handy string functions
Modified:
commons/sandbox/runtime/trunk/src/main/native/include/acr_string.h
commons/sandbox/runtime/trunk/src/main/native/shared/string.c
commons/sandbox/runtime/trunk/src/main/native/test/testcase.c
Modified: commons/sandbox/runtime/trunk/src/main/native/include/acr_string.h
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/include/acr_string.h?rev=808482&r1=808481&r2=808482&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/include/acr_string.h (original)
+++ commons/sandbox/runtime/trunk/src/main/native/include/acr_string.h Thu Aug 27 16:10:36 2009
@@ -204,7 +204,7 @@
* @param last internal buffer for maintaining the state.
* @return Token.
*/
-ACR_DECLARE(char *) ACR_strtok_c(char *str, int sep, char **last);
+ACR_DECLARE(char *) ACR_strctok(char *str, int sep, char **last);
/**
* Apache's "replacement" for the wcstok_r() function that uses
@@ -214,7 +214,18 @@
* @param last internal buffer for maintaining the state.
* @return Token.
*/
-ACR_DECLARE(wchar_t *) ACR_wcstok_c(wchar_t *str, int sep, wchar_t **last);
+ACR_DECLARE(wchar_t *) ACR_wcsctok(wchar_t *str, int sep, wchar_t **last);
+
+/**
+ * Apache's "replacement" for the strtok_r() function that uses
+ * a single char instead set of delimiters and doesn't tokenize the
+ * strings inside double quotes.
+ * @param str string to tokenize.
+ * @param sep Token delimiting character.
+ * @param last internal buffer for maintaining the state.
+ * @return Token.
+ */
+ACR_DECLARE(char *) ACR_strqctok(char *str, int sep, char **last);
/**
* Determine the number of tokens in string without
@@ -223,7 +234,7 @@
* @param sep Token delimiting character.
* @return Number of tokens.
*/
-ACR_DECLARE(int) ACR_StrTokensA(const char *str, int sep);
+ACR_DECLARE(int)ACR_strnctok(const char *str, int sep);
/**
* Determine the number of tokens in string without
@@ -232,7 +243,65 @@
* @param sep Token delimiting character.
* @return Number of tokens.
*/
-ACR_DECLARE(int) ACR_StrTokensW(const wchar_t *str, int sep);
+ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep);
+
+/**
+ * Determine the number of tokens in string without
+ * modifying it and not breaking the quoted strings.
+ * @param str string to tokenize.
+ * @param sep Token delimiting character.
+ * @return Number of tokens.
+ */
+ACR_DECLARE(int)ACR_strnqctok(const char *str, int sep);
+
+/**
+ * Determine the number of tokens in string without
+ * modifying it and not breaking the quoted strings.
+ * @param str string to tokenize.
+ * @param sep Token delimiting character.
+ * @return Number of tokens.
+ */
+ACR_DECLARE(int) ACR_wcsnqctok(const wchar_t *str, int sep);
+
+/**
+ * Determine the number of tokens in string without
+ * modifying it using space characters as delimiters and
+ * not breaking the quoted strings.
+ * @param str string to tokenize.
+ * @return Number of tokens.
+ */
+ACR_DECLARE(int) ACR_strnqtok(const char *str);
+
+/**
+ * Determine the number of tokens in string without
+ * modifying it using space characters as delimiters and
+ * not breaking the quoted strings.
+ * @param str string to tokenize.
+ * @return Number of tokens.
+ */
+ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str);
+
+/**
+ * Apache's "replacement" for the strtok_r() function that uses
+ * space characters instead set of delimiters and doesn't tokenize the
+ * strings inside double quotes.
+ * @param str string to tokenize.
+ * @param sep Token delimiting character.
+ * @param last internal buffer for maintaining the state.
+ * @return Token.
+ */
+ACR_DECLARE(char *) ACR_strqtok(char *str, char **last);
+
+/**
+ * Apache's "replacement" for the strtok_r() function that uses
+ * space characters instead set of delimiters and doesn't tokenize the
+ * strings inside double quotes.
+ * @param str string to tokenize.
+ * @param sep Token delimiting character.
+ * @param last internal buffer for maintaining the state.
+ * @return Token.
+ */
+ACR_DECLARE(wchar_t *) ACR_wcsqtok(wchar_t *str, wchar_t **last);
/**
* Count the number of string parts in multi string. Ansi version.
@@ -270,9 +339,28 @@
*/
ACR_DECLARE(jobjectArray) ACR_MszStrToStringArrayW(JNIEnv *env, const wchar_t *s);
+/**
+ * Convert all tab characters in a string to single space character.
+ * Tabs inside quotes are not converted.
+ * @param str String to use
+ * @return pointer to the string
+ * @note This function is useful to prepare the strings for tokenizing
+ * where the string can contain both tab and spaces.
+ */
+ACR_DECLARE(char *) ACR_strqtab2ss(char *str);
+
+/**
+ * Convert all tab characters in a string to single space character.
+ * Tabs inside quotes are not converted.
+ * @param str String to use
+ * @return pointer to the string
+ * @note This function is useful to prepare the strings for tokenizing
+ * where the string can contain both tab and spaces.
+ */
+ACR_DECLARE(wchar_t *) ACR_wcsqtab2ss(wchar_t *str);
+
#ifdef __cplusplus
}
#endif
#endif /* _ACR_STRING_H */
-
Modified: commons/sandbox/runtime/trunk/src/main/native/shared/string.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=808482&r1=808481&r2=808482&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Thu Aug 27 16:10:36 2009
@@ -928,7 +928,39 @@
return (str[x] != L'\0');
}
-ACR_DECLARE(char *) ACR_strtok_c(char *str, int sep, char **last)
+static char *strchr_q(const char *s1, int ch)
+{
+ int s0;
+
+ /* Some early sanity check */
+ if (!s1 || !*s1)
+ return NULL;
+ while ((s0 = *s1++) != 0) {
+ if (s0 == ch)
+ return (char *)(s1 - 1);
+ if (s0 == '\\' && *s1)
+ s1++; /* We have something escaped. Advance */
+ }
+ return NULL;
+}
+
+static wchar_t *wcschr_q(const wchar_t *s1, int ch)
+{
+ int s0;
+
+ /* Some early sanity check */
+ if (!s1 || !*s1)
+ return NULL;
+ while ((s0 = *s1++) != 0) {
+ if (s0 == ch)
+ return (wchar_t *)(s1 - 1);
+ if (s0 == L'\\' && *s1)
+ s1++; /* We have something escaped. Advance */
+ }
+ return NULL;
+}
+
+ACR_DECLARE(char *) ACR_strctok(char *str, int sep, char **last)
{
char *tok;
@@ -950,7 +982,95 @@
}
}
-ACR_DECLARE(int) ACR_StrTokensA(const char *str, int sep)
+ACR_DECLARE(char *) ACR_strqctok(char *str, int sep, char **last)
+{
+ char *tok;
+
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str == sep) /* skip leading delimiters */
+ str++;
+ if (*str == '"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == '"') {
+ if (*tok) {
+ *tok++ = '\0';
+ *last = tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == '\\' && *tok)
+ tok++;
+ }
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = strchr_q(str, sep))) {
+ *tok++ = '\0';
+ *last = tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+}
+
+ACR_DECLARE(char *) ACR_strqtok(char *str, char **last)
+{
+ char *tok;
+
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str && acr_isspace(*str)) /* skip leading delimiters */
+ str++;
+ if (*str == '"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == '"') {
+ if (*tok) {
+ *tok++ = '\0';
+ *last = tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == '\\' && *tok)
+ tok++;
+ }
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = strpbrk(str, " \t"))) {
+ *tok++ = '\0';
+ *last = tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+}
+
+ACR_DECLARE(int) ACR_strnctok(const char *str, int sep)
{
int cnt = 1;
@@ -969,13 +1089,27 @@
return cnt;
}
-ACR_DECLARE(int) ACR_StrTokensW(const wchar_t *str, int sep)
+ACR_DECLARE(int) ACR_strnqctok(const char *str, int sep)
{
int cnt = 1;
- while (*str && *str == (wchar_t)sep) /* skip leading delimiters */
+ while (*str == sep) /* skip leading delimiters */
str++;
while (*str) {
+ if (*str == '"') {
+ int ch;
+ str++;
+ /* Advance to the first unescaped quote */
+ while ((ch = *str++) != 0) {
+ if (ch == '"') {
+ cnt++;
+ break;
+ }
+ if (ch == '\\' && *str)
+ str++;
+ }
+ continue;
+ }
if (*str == sep) {
while (*str == sep)
str++;
@@ -988,7 +1122,59 @@
return cnt;
}
-ACR_DECLARE(wchar_t *) ACR_wcstok_c(wchar_t *str, int sep, wchar_t **last)
+ACR_DECLARE(int) ACR_strnqtok(const char *str)
+{
+ int cnt = 1;
+
+ while (*str && acr_isspace(*str)) /* skip leading delimiters */
+ str++;
+ while (*str) {
+ if (*str == '"') {
+ int ch;
+ str++;
+ /* Advance to the first unescaped quote */
+ while ((ch = *str++) != 0) {
+ if (ch == '"') {
+ cnt++;
+ break;
+ }
+ if (ch == '\\' && *str)
+ str++;
+ }
+ continue;
+ }
+ if (acr_isspace(*str)) {
+ while (acr_isspace(*str))
+ str++;
+ if (*str)
+ cnt++;
+ }
+ else
+ str++;
+ }
+ return cnt;
+}
+
+ACR_DECLARE(int) ACR_wcsnctok(const wchar_t *str, int sep)
+{
+ int cnt = 1;
+
+ while (*str && *str == (wchar_t)sep) /* skip leading delimiters */
+ str++;
+ while (*str) {
+ if (*str == (wchar_t)sep) {
+ while (*str == (wchar_t)sep)
+ str++;
+ if (*str)
+ cnt++;
+ }
+ else
+ str++;
+ }
+ return cnt;
+}
+
+ACR_DECLARE(wchar_t *) ACR_wcsctok(wchar_t *str, int sep, wchar_t **last)
{
wchar_t *tok;
@@ -1010,6 +1196,127 @@
}
}
+ACR_DECLARE(int) ACR_wcsnqtok(const wchar_t *str)
+{
+ int cnt = 1;
+
+ while (*str && iswspace(*str)) /* skip leading delimiters */
+ str++;
+ while (*str) {
+ if (*str == L'"') {
+ wchar_t ch;
+ str++;
+ /* Advance to the first unescaped quote */
+ while ((ch = *str++) != 0) {
+ if (ch == L'"') {
+ cnt++;
+ break;
+ }
+ if (ch == L'\\' && *str)
+ str++;
+ }
+ continue;
+ }
+ if (iswspace(*str)) {
+ while (iswspace(*str))
+ str++;
+ if (*str)
+ cnt++;
+ }
+ else
+ str++;
+ }
+ return cnt;
+}
+
+ACR_DECLARE(wchar_t *) ACR_wcsqctok(wchar_t *str, int sep, wchar_t **last)
+{
+ wchar_t *tok;
+
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str == sep) /* skip leading delimiters */
+ str++;
+ if (*str == L'"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == L'"') {
+ if (*tok) {
+ *tok++ = L'\0';
+ *last = tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == L'\\' && *tok)
+ tok++;
+ }
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = wcschr_q(str, sep))) {
+ *tok++ = L'\0';
+ *last = tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+}
+
+ACR_DECLARE(wchar_t *) ACR_wcsqtok(wchar_t *str, wchar_t **last)
+{
+ wchar_t *tok;
+
+ if (!str) /* subsequent call */
+ str = *last; /* start where we left off */
+ if (!str) /* no more tokens */
+ return NULL;
+ while (*str && acr_isspace(*str)) /* skip leading delimiters */
+ str++;
+ if (*str == L'"') {
+ int ch;
+ /* Advance to the first unescaped quote */
+ tok = str + 1;
+ while ((ch = *tok++) != 0) {
+ if (ch == L'"') {
+ if (*tok) {
+ *tok++ = L'\0';
+ *last = tok;
+ }
+ else
+ *last = NULL;
+ return str;
+ }
+ if (ch == L'\\' && *tok)
+ tok++;
+ }
+ /* Unterminated quote */
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+ if ((tok = wcspbrk(str, L" \t"))) {
+ *tok++ = L'\0';
+ *last = tok;
+ return str;
+ }
+ else {
+ *last = NULL;
+ /* Check for last empty token */
+ return *str ? str : NULL;
+ }
+}
+
ACR_DECLARE(size_t) ACR_MszStrCountA(const char *s)
{
size_t n = 0;
@@ -1092,3 +1399,52 @@
return arr;
}
+ACR_DECLARE(char *) ACR_strqtab2ss(char *str)
+{
+ char *ptr = str;
+
+ while (*str) {
+ if (*str == '"') {
+ int ch;
+ str++;
+ /* Advance to the first unescaped quote */
+ while ((ch = *str++) != 0) {
+ if (ch == '"') {
+ break;
+ }
+ if (ch == '\\' && *str)
+ str++;
+ }
+ continue;
+ }
+ if (*str == '\t')
+ *str = ' ';
+ str++;
+ }
+ return ptr;
+}
+
+ACR_DECLARE(wchar_t *) ACR_wcsqtab2ss(wchar_t *str)
+{
+ wchar_t *ptr = str;
+
+ while (*str) {
+ if (*str == L'"') {
+ wchar_t ch;
+ str++;
+ /* Advance to the first unescaped quote */
+ while ((ch = *str++) != 0) {
+ if (ch == L'"') {
+ break;
+ }
+ if (ch == L'\\' && *str)
+ str++;
+ }
+ continue;
+ }
+ if (*str == L'\t')
+ *str = L' ';
+ str++;
+ }
+ return ptr;
+}
Modified: commons/sandbox/runtime/trunk/src/main/native/test/testcase.c
URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/test/testcase.c?rev=808482&r1=808481&r2=808482&view=diff
==============================================================================
--- commons/sandbox/runtime/trunk/src/main/native/test/testcase.c (original)
+++ commons/sandbox/runtime/trunk/src/main/native/test/testcase.c Thu Aug 27 16:10:36 2009
@@ -474,11 +474,11 @@
char *state;
sprintf(buf, " 1 22 3333 4");
- e = ACR_StrTokensA(buf, ' ');
- token = ACR_strtok_c(buf, ' ', &state);
+ e = ACR_strnctok(buf, ' ');
+ token = ACR_strctok(buf, ' ', &state);
if (token) {
n++;
- while ((token = ACR_strtok_c(NULL, ' ', &state))) {
+ while ((token = ACR_strctok(NULL, ' ', &state))) {
n++;
if (n > 20)
break;