You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2008/03/12 23:13:22 UTC

svn commit: r636534 - /stdcxx/trunk/src/wcodecvt.cpp

Author: sebor
Date: Wed Mar 12 15:13:18 2008
New Revision: 636534

URL: http://svn.apache.org/viewvc?rev=636534&view=rev
Log:
2008-03-12  Martin Sebor  <se...@roguewave.com>

	STDCXX-435
	* src/wcodecvt.cpp (__rw_libc_do_in): Replaces use(s) of mbsrtowcs()
	with mbrtowc() to prevent reading past the end of source sequences
	that aren't NUL-terminated.

Modified:
    stdcxx/trunk/src/wcodecvt.cpp

Modified: stdcxx/trunk/src/wcodecvt.cpp
URL: http://svn.apache.org/viewvc/stdcxx/trunk/src/wcodecvt.cpp?rev=636534&r1=636533&r2=636534&view=diff
==============================================================================
--- stdcxx/trunk/src/wcodecvt.cpp (original)
+++ stdcxx/trunk/src/wcodecvt.cpp Wed Mar 12 15:13:18 2008
@@ -22,7 +22,7 @@
  * implied.   See  the License  for  the  specific language  governing
  * permissions and limitations under the License.
  *
- * Copyright 2001-2007 Rogue Wave Software, Inc.
+ * Copyright 2001-2008 Rogue Wave Software, Inc.
  * 
  **************************************************************************/
 
@@ -342,9 +342,6 @@
 }
 
 
-//  This returns two result codes:  error and ok. The partial error result
-//  is not  returned because there  is no way  to know whether or  not the
-//  input sequence contains any more valid characters.
 static _STD::codecvt_base::result
 __rw_libc_do_in (_RWSTD_MBSTATE_T &state,
                  const char       *from, 
@@ -359,95 +356,59 @@
 
     _STD::codecvt_base::result res = _STD::codecvt_base::ok;
 
-    _RWSTD_MBSTATE_T save_state = state;   // saved state before conversion
+    // compute the length of the source sequence in bytes and
+    // the size of the destination buffer in wide characters
+    _RWSTD_SIZE_T src_len  = from_end - from;
+    _RWSTD_SIZE_T dst_size = to_limit - to;
+
+    // set the initial values to the source and destination pointers
+    const char* psrc = from;
+    wchar_t*    pdst = to;
 
-    _RWSTD_SIZE_T src_len = from_end - from;   // source length
-    _RWSTD_SIZE_T dst_len = to_limit - to;       // destination length
+    while (dst_size && src_len) {
 
-    const char*   psrc = from_next ? from_next : "";   // source
-    wchar_t*      pdst = to_next;                      // destination
-
-#ifndef _RWSTD_NO_MBSRTOWCS
-    // mbsrtowcs() requires the input to be a NULL-terminated string
-    const _RWSTD_SIZE_T ret = mbsrtowcs (pdst, &psrc, dst_len, &state);
-#else   // if defined (_RWSTD_NO_MBSRTOWCS)
-    const _RWSTD_SIZE_T ret = _RWSTD_SIZE_MAX;
-#endif    // _RWSTD_NO_MBSRTOWCS
-
-    // if an error occurred during the restartable function
-    // or if that function is not available
-    if (_RWSTD_SIZE_MAX == ret) {
-        // the conversion here (besides the previous failure) is done 
-        // one character at a time because the non-reentrant/restartable 
-        // counterpart of mbsrtowcs() does not provide any information
-        // about the size of the input that has been processed.
-        _RWSTD_UNUSED (state);
-
-        // restore `psrc' value
-        psrc = from_next ? from_next : "";
-
-        while (dst_len && src_len) {
-
-            _RWSTD_SIZE_T tmp;
+        // the number of bytes that form the next multibyte character
+        _RWSTD_SIZE_T nbytes;
 
 #ifndef _RWSTD_NO_MBRTOWC
-            tmp = mbrtowc (pdst, psrc, src_len, &state);
+        nbytes = mbrtowc (pdst, psrc, src_len, &state);
 #elif !defined (_RWSTD_NO_MBTOWC)
-            tmp = mbtowc (pdst, psrc, src_len);
+        nbytes = mbtowc (pdst, psrc, src_len);
 #else
-            tmp = _RWSTD_SIZE_MAX;
+        nbytes = _RWSTD_SIZE_MAX;
 #endif
 
-            // error; -1 result comes only from an illegal sequence
-            if (_RWSTD_SIZE_MAX == tmp) {
-                res = _STD::codecvt_base::error;
-                break;
-            }
- 
-            // not enough bytes in input to form a valid 
-            // character - translates to an ok result
-            if (tmp == (_RWSTD_SIZE_T)(-2))
-                break;
-
-            // the multibyte sequence is the NULL character
-            if (tmp == 0) 
-                tmp++;
-
-            // adjust the pointers
-            psrc    += tmp;
-            src_len -= tmp;
-            ++pdst;
-            --dst_len;
+        // -1 indicates an invalid sequence (i.e., error)
+        if (nbytes == (_RWSTD_SIZE_T)(-1)) {
+        res = _STD::codecvt_base::error;
+            break;
         }
-
-        // adjust "next" pointers
-        from_next = psrc;
-        to_next   = pdst;
-
+ 
+        // -2 indicates an ambiguous but valid subsequence
+        // (i.e., ok)
+        if (nbytes == (_RWSTD_SIZE_T)(-2))
+            break;
+
+        // 0 indicates the NUL character (skip over it)
+        if (nbytes == 0) 
+            ++nbytes;
+
+        // > 0 indicates the number of bytes in the successfully
+        // converted multibyte character
+        psrc    += nbytes;
+        src_len -= nbytes;
+        ++pdst;
+        --dst_size;
     }
-    else {
-        // the conversion succeeded on the first attempt
-
-        if (psrc)
-            from_next = psrc;
-        else {
-
-            // mbsrtowcs() sets `psrc' to 0 if the conversions
-            // stops due to the terminating NUL character
 
-            const _RWSTD_SIZE_T tmp =
-                __rw_libc_mbrlen (save_state, from_next, ret);
-            
-            from_next += tmp;
-        }
-
-        to_next += ret;
-    }
+    // adjust "next" pointers
+    from_next = psrc;
+    to_next   = pdst;
 
     // if the conversion has exhausted all space in the destination
     // range AND there are more COMPLETE characters in the source
     // range then we have a "partial" conversion
-    if (res == _STD::codecvt_base::ok && src_len && !dst_len) {
+    if (res == _STD::codecvt_base::ok && src_len && !dst_size) {
         _RWSTD_MBSTATE_T tmp_state = state;
         _RWSTD_SIZE_T tmp = __rw_libc_mbrlen (tmp_state, psrc, src_len);
         if (tmp < (_RWSTD_SIZE_T)(-2))