You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2006/06/10 00:16:26 UTC
svn commit: r413181 - /incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp

Author: sebor
Date: Fri Jun  9 15:16:26 2006
New Revision: 413181

URL: http://svn.apache.org/viewvc?rev=413181&view=rev
Log:
2006-06-09  Martin Sebor  <se...@roguewave.com>

	STDCXX-4
	* 27.filebuf.codecvt.cpp: New test exercising lib.filebuf with
	a user-defined codecvt facet.

Added:
    incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp   (with props)

Added: incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp?rev=413181&view=auto
==============================================================================
--- incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp (added)
+++ incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp Fri Jun  9 15:16:26 2006
@@ -0,0 +1,1174 @@
+/************************************************************************
+ *
+ * codecvt.cpp - test exercising file streams and code conversion
+ *
+ * $Id$
+ *
+ ************************************************************************
+ *
+ * Copyright 2006 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ **************************************************************************/
+
+// PROBLEM DESCRIPTION:
+//   basic_ofstream<>::overflow() mangles a multibyte sequence when using
+//   code conversion. This functionality of the class isn't currently (as
+//   of the date of the creation of the test) being exercised by our
+//   testsuite due to the lack of a suitable locale (e.g., ja_JP).
+
+// TEST DESCRIPTION:
+//   test creates a temporary file and fills it with series of pairs
+//   <offset, seq>, where `offset' is the offset from the beginning of
+//   the file to the first (decimal) digit of offset, and `seq' is
+//   a character in the range [1, CHAR_MAX] possibly converted to an
+//   escape or trigraph sequence according to the rules described
+//   in 2.3 and 2.13.2
+//
+//   the program then exercises the stream's (actually, the stream
+//   file buffer's) ability to extract and seek within such a file
+//   thus testing the stream's ability to crrectly interact with
+//   the codecvt facet installed in the imbued locale
+
+
+#include <rw/_defs.h> 
+#if defined (__IBMCPP__) && !defined (_RWSTD_NO_IMPLICIT_INCLUSION)
+   // disable implicit inclusion to work around a limitation
+   // in IBM VisualAge
+#  define _RWSTD_NO_IMPLICIT_INCLUSION 
+#endif 
+ 
+
+#include <fstream>
+#include <sstream>
+
+#include <climits>   // for UCHAR_MAX
+#include <cstdio>    // for fclose(), fopen(), fseek(), size_t
+#include <cstdlib>   // for abort()
+#include <cstring>   // for memset(), strcmp(), strlen()
+#include <cwchar>    // for mbstate_t
+
+#include <driver.h>
+#include <file.h>
+
+/**************************************************************************/
+
+// code conversion facet suitable for replacement of the default
+// codecvt<char, char, mbstate_t> facet
+// cformat::do_out() converts printable ASCII characters into themselves,
+// control characters are converted to standard C escape sequences
+// cformat::do_in() reverses the effect of do_out()
+class cformat: public std::codecvt <char, char, std::mbstate_t>
+{
+    enum {
+        new_line         = 0x0001,  // convert '\n' to "\n"
+        horizontal_tab   = 0x0002,  // convert '\t' to "\t"
+        vertical_tab     = 0x0004,  // convert '\v' to "\v"
+        backspace        = 0x0008,  // convert '\b' to "\b"
+        carriage_return  = 0x0010,  // convert '\r' to "\r"
+        form_feed        = 0x0020,  // convert '\f' to "\f"
+        alert            = 0x0040,  // convert '\a' to "\a"
+        backslash        = 0x0080,  // convert '\\' to "\\"
+        question_mark    = 0x0100,  // convert '?' to "\?"
+        single_quote     = 0x0200,  // convert '\\'' to "\'"
+        double_quote     = 0x0400,  // convert '"' to "\""
+        trigraphs        = 0x8000,  // convert to/from trigrap sequences
+        hex              = 0x1000   // hex notation in external representation
+    };
+
+    const int mask;   // bitmaps of flags above
+
+public:
+    explicit
+    cformat (std::size_t ref = 0, int m = 0)
+        : std::codecvt<char, char, std::mbstate_t> (ref),
+          mask (m) { /* empty */ }
+
+protected:
+
+    virtual result
+    do_out (state_type&, 
+            const intern_type*, const intern_type*, 
+            const intern_type*&, 
+            extern_type*, extern_type*, extern_type*&) const;
+
+    virtual result
+    do_in (state_type&, const extern_type*, 
+           const extern_type*, const extern_type*&, 
+           intern_type*, intern_type*, intern_type*&) const;
+
+    virtual result
+    do_unshift (state_type&, extern_type*, 
+                extern_type*, extern_type*&) const {
+        // stateless encoding, no conversion necessary
+        return noconv;
+    }
+
+    virtual int
+    do_encoding () const _THROWS (()) {
+        return 0;   // variable number of external chars per single internal
+    }
+
+    virtual bool
+    do_always_noconv () const _THROWS (()) {
+        return false;   // conversion always necessary
+    }
+
+    // returns the maximum `N' of extern chars in the range [from, from_end)
+    // such that N represents max or fewer internal chars
+    virtual int
+    do_length (state_type&, const extern_type*, 
+               const extern_type*, std::size_t) const;
+
+    // returns the max value do_length (s, from, from_end, 1) can return
+    // for any valid range [from, from_end) - see LWG issue 74 (a DR)
+    virtual int
+    do_max_length () const _THROWS (()) {
+        // assume that an internal char occupies at most 4 external chars
+        // this won't hold for e.g. '\x00001' etc., but will hold for all
+        // chars in the hex notation of up to two digits and all chars in
+        // octal notation (which are required to fit in 4 by the standard)
+        return 4;
+    }
+};
+
+
+std::codecvt_base::result
+cformat::do_out (      state_type& /* unused */,
+                 const intern_type *from,
+                 const intern_type *from_end,
+                 const intern_type *&from_next, 
+                       extern_type *to,
+                       extern_type *to_end,
+                       extern_type *&to_next) const
+{
+    // assert 22.2.1.5.2, p1 preconditions
+    rw_assert (from <= from_end, __FILE__, __LINE__,
+               "codecvt::do_out (..., from = %#p, from + %d, %#p, "
+               "to = %#p, to + %d, %#p): from <= from_end",
+               from, from_end - from, from_next, to, to_end - to, to_next);
+
+    rw_assert (to <= to_end, __FILE__, __LINE__,
+               "codecvt::do_out (..., from = %#p, from + %d, %#p, "
+               "to = %#p, to + %d, %#p): to <= to_end",
+               from, from_end - from, from_next, to, to_end - to, to_next);
+
+    // assume no conversion will be performed
+    result res = noconv;
+
+    for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
+
+        // out of space
+        if (to_next == to_end) {
+            res = partial;
+            break;
+        }
+
+        // convert to unsigned to make sure comparison works
+        unsigned char ch = *from_next;
+        extern_type esc  = extern_type ();
+
+        if (ch < ' ') {
+
+            // convert to a C escape sequence
+            switch (ch) {
+
+            case '\a':
+                if (!(mask & alert)) {
+                    ch  = 'a';
+                    esc = '\\';
+                }
+                break;
+
+            case '\b':
+                if (!(mask & backspace)) {
+                    ch  = 'b';
+                    esc = '\\';
+                }
+                break;
+
+            case '\t':
+                if (!(mask & horizontal_tab)) {
+                    ch  = 't';
+                    esc = '\\';
+                }    
+                break;
+
+            case '\n':
+                if (!(mask & new_line)) {
+                    ch  = 'n';
+                    esc = '\\';
+                }
+                break;
+
+            case '\v':
+                if (!(mask & vertical_tab)) {
+                    ch  = 'v';
+                    esc = '\\';
+                }
+                break;
+
+            case '\f':
+                if (!(mask & form_feed)) {
+                    ch  = 'f';
+                    esc = '\\';
+                }
+                break;
+
+            case '\r':
+                if (!(mask & carriage_return)) {
+                    ch = 'r';
+                    esc = '\\';
+                }
+                break;
+
+            case '\\':
+                if (!(mask & backslash)) {
+                    ch  = '\\';
+                    esc = '\\';
+                }
+                break;
+
+            default:
+                esc = '\\';
+                break;
+            }
+        }
+        else if (ch > '~') {
+            // convert to a C escape sequence (octal)
+            esc = '\\';
+        }
+        else {
+            // escape special characters
+            switch (ch) {
+            case '?':
+                if (!(mask & question_mark))
+                    esc = '\\';
+                break;
+
+            case '\'':
+                if (!(mask & single_quote))
+                    esc = '\\';
+                break;
+
+            case '"':
+                if (!(mask & double_quote))
+                    esc = '\\';
+                break;
+
+            case '\\':
+                if (!(mask & backslash))
+                    esc = '\\';
+                break;
+            }
+
+            if (!(mask & trigraphs)) {
+
+                // convert to a trigraph sequence
+                switch (ch) {
+                case '#':  ch  = '=';  esc = '?'; break;
+                case '\\': ch  = '/';  esc = '?'; break;
+                case '^':  ch  = '\''; esc = '?'; break;
+                case '[':  ch  = '(';  esc = '?'; break;
+                case ']':  ch  = ')';  esc = '?'; break;
+                case '|':  ch  = '!';  esc = '?'; break;
+                case '{':  ch  = '<';  esc = '?'; break;
+                case '}':  ch  = '>';  esc = '?'; break;
+                case '~':  ch  = '-';  esc = '?'; break;
+                }
+            }
+        }
+
+        // process `ch' and `esc'
+
+        if ('\\' == esc) {
+
+            // conversion was performed
+            res = ok;
+
+            if (ch < ' ' || ch > '~') {
+
+                // need room for an escape followed by three ocal digits
+                if (4 > to_end - to_next) {
+                    res = partial;
+                    break;
+                }
+
+                static const char digits[] = "0123456789abcdef";
+
+                // add an escape character
+                *to_next++ = esc;
+
+                if (mask & hex) {
+                    // add hex representation (exactly three chars)
+                    *to_next++ = 'x';
+                    *to_next++ = digits [(ch & 0xf0) >> 4];
+                    *to_next++ = digits [ch & 0xf];
+                }
+                else {
+                    // add octal representation (exactly three digits)
+                    *to_next++ = digits [(ch & (7 << 6)) >> 6];
+                    *to_next++ = digits [(ch & (7 << 3)) >> 3];
+                    *to_next++ = digits [ch & 7];
+                }
+            }
+            else {
+                // need room for an escape followed by a single char 
+                if (2 > to_end - to_next) {
+                    res = partial;
+                    break;
+                }
+
+                // add an escape char followed by the escaped char
+                *to_next++ = esc;
+                *to_next++ = ch;
+            }
+        }
+        else if ('?' == esc) {
+            // need room for a trigraph sequence
+            if (3 > to_end - to_next) {
+                res = partial;
+                break;
+            }
+
+            // conversion was performed
+            res = ok;
+
+            // add a trigraph sequence
+            *to_next++ = '?';
+            *to_next++ = '?';
+            *to_next++ = ch;
+        }
+        else {
+            // not escaped
+            *to_next++ = ch;
+        }
+    }
+
+    if (noconv == res) {
+        // 22.2.1.5.2, p2, Note: no conversion was necessary
+        from_next = from;
+        to_next   = to;
+    }
+
+    rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
+              "user-defined codecvt: internal inconsistency");
+
+    rw_fatal (to_next >= to && to_next <= to_end, 0, __LINE__,
+              "user-defined codecvt: internal inconsistency");
+
+    return res;
+}
+
+
+std::codecvt_base::result
+cformat::do_in (      state_type& /* unused */,
+                const extern_type *from,
+                const extern_type *from_end,
+                const extern_type *&from_next, 
+                      intern_type *to,
+                      intern_type *to_end,
+                      intern_type *&to_next) const
+{
+    // assert 22.2.1.5.2, p1 preconditions
+    rw_assert (from <= from_end, __FILE__, __LINE__,
+               "codecvt::do_in (..., from = %#p, from + %d, %#p, "
+               "to = %#p, to + %d, %#p): from <= from_end",
+               from, from_end - from, from_next, to, to_end - to, to_next);
+
+    rw_assert (to <= to_end, __FILE__, __LINE__,
+               "codecvt::do_in (..., from = %#p, from + %d, %#p, "
+               "to = %#p, to + %d, %#p) to <= to_end",
+               from, from_end - from, from_next, to, to_end - to, to_next);
+
+    result res = ok;
+
+    for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
+
+        unsigned char ch = *from_next;
+        intern_type c    = intern_type ();
+
+        if ('\\' == ch) {
+
+            if (2 > from_end - from_next) {
+                // ok is the correct value to return in this case,
+                // but partial should be handled as well for robustness
+                res = (from_end - (extern_type*)0) % 2 ? ok : partial;
+                break;
+            }
+
+            ch = from_next [1];
+
+            if ('x' == ch) {
+                // interpret a hex escape sequence
+
+                // advance past '\x'
+                const extern_type *next = from_next + 2;
+
+                // parse hex digits until a non-hex digits is encountered
+                for (; ; ++next) {
+
+                    if (next == from_end) {
+                        // do not advance to the end since there may be 
+                        // more digits following it (e.g., '\x012' with
+                        // from_end pointing at '1' or '2')
+                        return partial;
+                    }
+
+                    ch = *next;
+                    if (ch >= '0' && ch <= '9')
+                        c = (c << 4) | (ch - '0');
+                    else if (ch >= 'a' && ch <= 'f')
+                        c = (c << 4) | (ch - 'a' + 10);
+                    else if (ch >= 'A' && ch <= 'F')
+                        c = (c << 4) | (ch - 'A' + 10);
+                    else if (next - from_next > 2)
+                        break;
+                    else {
+                        return error;   // non-hex digit immediately after '\x'
+                    }
+                }
+
+                // advance to the end of parsed number
+                from_next = next - 1;
+            }
+            else if ('0' <= ch && '7' >= ch) {
+                // interpret a oct escape sequence
+
+                // (tentatively) advance past '\'
+                const extern_type *next = from_next + 1;
+
+                // parse at most three oct digits
+                for (; next - from_next < 4; ++next) {
+
+                    if (next == from_end) {
+                        // do not advance to the end since there may be 
+                        // more digits following it (e.g., '\x012' with
+                        // from_end pointing at '1' or '2')
+                        return partial;
+                    }
+
+                    ch = *next;
+                    if (ch >= '0' && ch <= '7')
+                        c = (c << 3) | (ch - '0');
+                    else if (next - from_next)
+                        break;
+                    else {
+                        // advance to the offending char
+                        from_next = next;
+                        return error;   // non-oct digit immediately after '\'
+                    }
+                }
+
+                // advance to the end of parsed number
+                from_next = next - 1;
+            }
+            else {
+                // interpret standard C escape sequence
+                switch (ch) {
+                case 'a': c = '\a'; break;
+                case 'b': c = '\b'; break;
+                case 't': c = '\t'; break;
+                case 'n': c = '\n'; break;
+                case 'v': c = '\v'; break;
+                case 'f': c = '\f'; break;
+                case 'r': c = '\r'; break;
+
+                // optional but allowed and escaped backslash
+                case '?': case '"': case '\'': case '\\': c = ch ; break;
+
+                // bad escape sequence                    
+                default: return error;
+                }
+
+                // advance past the initial '\'
+                ++from_next;
+            }
+        }
+        else if ('?' == ch && !(mask & trigraphs)) {
+            // (try to) convert a trigraph sequence
+            if (   2 > from_end - from_next
+                || '?' == from_next [1] && 3 > from_end - from_next) {
+                res = partial;
+                break;
+            }
+
+            if ('?' == from_next [1]) {
+
+                // "??" (potentilly) introduces a trigraph sequence
+                switch (from_next [2]) {
+
+                case '=':  c = '#';  break;
+                case '/':  c = '\\'; break;
+                case '\'': c = '^';  break;
+                case '(':  c = '[';  break;
+                case ')':  c = ']';  break;
+                case '!':  c = '|';  break;
+                case '<':  c = '{';  break;
+                case '>':  c = '}';  break;
+                case '-':  c = '~';  break;
+
+                default:
+                    // no a trigraph sequence, won't convert
+                    c = from_next [0];   // i.e., '?'
+                }
+
+                // skip the leading "??" of a trigraph sequence
+                if (c != from_next [0])
+                    from_next += 2;
+            }
+            else
+                // ordinary (not escaped) character
+                c = ch;
+        }
+        else
+            // ordinary (not escaped) character
+            c = ch;
+
+        // to_next may be 0 (when called from do_length())
+        // doing pointer math on invalid pointers (null) has undefined behavior
+        // but will probably work in most cases
+        if (to_next)
+            *to_next = c;
+
+        ++to_next;
+
+        // in case of of the inner loops has reached end
+        if (from_next == from_end)
+            break;
+    }
+    
+    rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
+              "user-defined codecvt: internal inconsistency");
+
+    rw_fatal (to_next >= to && (to_next <= to_end || !to_end), 0, __LINE__,
+              "user-defined codecvt: internal inconsistency");
+
+    return res;
+}
+
+
+int
+cformat::do_length (state_type&,
+                    const extern_type *from, 
+                    const extern_type *from_end,
+                    std::size_t        max) const
+{
+    const extern_type *from_next;
+    intern_type *to_next = 0;
+
+    std::mbstate_t st;
+    std::memset (&st, 0, sizeof st);
+
+    // use do_in() with `to' of 0 to do the computation
+    // doing pointer math on invalid pointers (null) has undefined behavior
+    // but will probably work in most cases
+    do_in (st, from, from_end, from_next,
+           to_next, to_next + max, to_next);
+
+    return to_next - (intern_type*)0;
+}
+
+/***********************************************************************/
+
+// determines file size in bytes
+static std::streamsize
+fsize (const char *fname)
+{
+    std::FILE* const f = std::fopen (fname, "r");
+    if (!f || std::fseek (f, 0, SEEK_END))
+        return -1;
+
+    const std::streamsize size = std::ftell (f);
+
+    std::fclose (f);
+
+    return size;
+}
+
+/***********************************************************************/
+
+static void
+self_test ()
+{
+    rw_info (0,  __FILE__, __LINE__,
+             "user-defined codecvt facet -- self test");
+
+    static const char* const result[] = {
+        "ok", "partial", "error", "noconv"
+    };
+
+    // user-defined code conversion facet
+    cformat fmt (1);
+
+    // original array of internal characters and one to which to convert
+    // an external representation back to (for comparison)
+    cformat::intern_type intrn [2][256] = { { '\0' } };
+
+    // array of external chars large enough to hold the internal array
+    // each internal char converts to at most 4 external chars
+    cformat::extern_type extrn [1024] = { '\0' };
+
+    // fill internal array with chars from '\1' to '\377'
+    for (std::size_t i = 0; i != sizeof intrn [0] - 1; ++i)
+        intrn [0][i] = cformat::intern_type (i + 1);
+
+    const cformat::intern_type *intrn_next_0 = 0;
+          cformat::intern_type *intrn_next_1 = 0;
+
+          cformat::extern_type *extrn_next   = 0;
+    
+
+    // dummy (state not used, conversion is stateless)
+    std::mbstate_t st;
+    std::memset (&st, 0, sizeof st);
+
+    // convert internal to external representation, substituting
+    // escape sequences for non-printable characters
+    std::codecvt_base::result res;
+
+    // convert array in internal representation to external representation
+    res = fmt.out (st,
+                   intrn [0], intrn [0] + sizeof intrn [0], intrn_next_0,
+                   extrn, extrn + sizeof extrn, extrn_next);
+
+    rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
+               "codecvt::out (); result == codecvt_base::ok, "
+               "got codecvt_base::%s", result [res]);
+
+    // assert that the external sequence is longer than the internal one
+    rw_assert (extrn_next - extrn > intrn_next_0 - intrn [0], 0, __LINE__,
+               "codecvt::out (); converted size %d, expected > %d",
+               extrn_next - extrn, intrn_next_0 - intrn [0]);
+
+    // convert external to internal representation, parsing
+    // multi-char escape sequences into single chars
+
+    const cformat::intern_type *next = extrn;
+
+    intrn_next_1 = intrn [1];
+
+    for (; next != extrn_next; ) {
+
+        // allow only a small buffer space to exercise partial conversion
+        std::size_t step = std::size_t (extrn_next - next);
+        if (step > 12)
+            step = 5 + step % 8;
+
+        res = fmt.in (st,
+                      next, next + step, next,
+                      intrn_next_1, intrn [1] + sizeof intrn [1], intrn_next_1);
+
+        if (std::codecvt_base::error == res)
+            break;
+    }
+
+    // assert that entrire sequence converted ok
+    rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
+               "codecvt::in (); result == codecvt_base::ok, got "
+               "codecvt_base::%s at offset %d", result [res], next - extrn);
+
+    rw_assert (intrn_next_1 == intrn [1] + sizeof intrn [1], 0, __LINE__,
+               "codecvt::in (); to_next == %#p, got %#p",
+               intrn [1] + sizeof intrn [1], intrn_next_1);
+
+    rw_assert (0 == std::strcmp (intrn [0], intrn [1]), 0, __LINE__,
+               "codecvt<>::out/in ()");
+}
+
+/***********************************************************************/
+
+#define RW_ASSERT_STATE(strm, state)                            \
+        rw_assert ((strm).rdstate () == (state), 0, __LINE__,   \
+                   "rdstate () == %{Is}, got %{Is}",            \
+                   (state), (strm).rdstate ())
+
+static void
+test_noconv (const char *fname)
+{
+    rw_info (0, 0, __LINE__, "ifstream extraction without conversion");
+
+    std::ifstream f (fname);
+
+    // make sure file stream has been successfully opened
+    RW_ASSERT_STATE (f, std::ios::goodbit);
+
+    // gain public access to protected members
+    struct pubbuf: std::streambuf {
+
+        // working around an MSVC 6.0 bug (PR #26330)
+        typedef std::streambuf Base;
+
+        virtual std::streamsize showmanyc () {
+            return Base::showmanyc ();
+        }
+        char* pubgptr () {
+            return Base::gptr ();
+        }
+        char* pubegptr () {
+            return Base::egptr ();
+        }
+    };
+
+    // use static cast through void* to avoid using reinterpret_cast
+    pubbuf *rdbuf = _RWSTD_STATIC_CAST (pubbuf*, (void*)f.rdbuf ());
+
+    std::streamsize filesize = rdbuf->showmanyc ();
+
+    // exercise 27.5.2.4.3, p1: showmanyc() returns the "estimated"
+    // size of the sequence (i.e., the file size in this case)
+    rw_assert (filesize == fsize (fname), 0, __LINE__,
+               "streambuf::showmanyc () == %ld, got %ld",
+               fsize (fname), filesize);
+    
+    // exercise 27.5.2.2.3, p1
+    filesize = f.rdbuf ()->in_avail ();
+
+    const char *gptr  = rdbuf->pubgptr ();
+    const char *egptr = rdbuf->pubgptr ();
+
+    rw_assert (filesize == (gptr < egptr ? egptr - gptr : fsize (fname)),
+               0, __LINE__,
+               "streambuf::in_avail () == %ld, got %ld",
+               (gptr < egptr ? egptr - gptr : fsize (fname)), filesize);
+
+    // allocate buffer large enough to accomodate the converted
+    // (i.e. internal) sequence
+    const std::size_t bufsize = 0x10000;     // 64k should do it
+    char *tmpbuf = new char [bufsize];
+
+    // fill with non-0 value to check for writes past the end
+    // (see also Onyx incident 14033)
+    std::memset (tmpbuf, '\x7f', bufsize);
+
+    // ecercise putback area
+    std::streamsize i;
+
+#ifndef _RWSTD_PBACK_SIZE
+#  define _RWSTD_PBACK_SIZE 1
+#endif   // _RWSTD_PBACK_SIZE
+
+    // _RWSTD_PBACK_SIZE is the size of the putback area the library
+    // was configured with; the macro expands to streamsize (N)
+    for (i = 0; i != _RWSTD_PBACK_SIZE + 1; ++i) {
+        // read a few characters, read must not append a '\0'
+        std::streamsize n = f.read (tmpbuf, i).gcount ();
+
+        // assert that read exactly `i' chars, buffer not null-terminared
+        rw_assert (i == n && '\x7f' == tmpbuf [i], 0, __LINE__,
+                   "ifstream::read (%#p, %ld) read %ld,"
+                   "buffer terminated with '\\%03o'",
+                   tmpbuf, i, n, tmpbuf [i]);
+
+        // put back read characters; assert that they are the same
+        // as those in the corresponding positions in the buffer
+        for (std::streamsize j = 0; j != i; ++j) {
+            std::ifstream::int_type c = f.rdbuf ()->sungetc ();
+
+            typedef std::ifstream::traits_type Traits;
+
+            rw_assert (Traits::to_int_type (tmpbuf [n - j - 1]) == c,
+                       0, __LINE__,
+                       "filebuf::sungetc() == '\\%03o', got '\\%03o",
+                       tmpbuf [n - j - 1], c);
+        }
+
+        // re-read characters just put back
+        char buf [_RWSTD_PBACK_SIZE + 1];
+        std::memset (buf, '\x7f', sizeof buf);
+
+        std::streamsize n2 = f.read (buf, i).gcount ();
+
+        // assert that the requested number of chars were read in
+        rw_assert (i == n2 && '\x7f' == buf [i], 0, __LINE__,
+                   "ifstream::read (%#p, %ld) read %ld,"
+                   "buffer terminated with '\\%03o'",
+                   buf, i, n2, buf [i]);
+
+        // assert that the read chars are those that were put back
+        for (std::streamsize k = 0; k != i; ++k) {
+            rw_assert (buf [k] == tmpbuf [k], 0, __LINE__,
+                       "buffer mismatch at offset %ld: got '\\%03o', "
+                       "expected '\\%03o'", k, buf [k], tmpbuf [k]);
+            // put character back again so that it can be read back in
+            f.rdbuf ()->sungetc ();
+        }
+    }
+
+    // read file contents into buffer (apply no conversion)
+    const std::streamsize n = f.read (tmpbuf, bufsize).gcount ();
+
+    // 27.6.1.3, p28 - read() sets eofbit | failbit
+    // if end-of-file occurs on the input sequence
+    RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
+
+    // assert that the entiire file has been read
+    rw_assert (n == filesize && '\x7f' == tmpbuf [n], 0, __LINE__,
+               "ifstream::read (%#p, %ld) read %ld, expected %ld; "
+               "buffer terminated with '\\%03o' [%s]",
+               tmpbuf, bufsize, n, filesize, tmpbuf [n], fname);
+
+    tmpbuf [n] = '\0';
+
+    // assert that file contains no control characters
+    bool b = true;
+    for (i = 0; b && i != UCHAR_MAX + 1; ++i) {
+        if (i >= ' ' || i <= '~')
+            continue;
+        b = 0 == std::strchr (tmpbuf, char (i));
+    }
+
+    rw_assert (b, 0, __LINE__,
+               "unescaped non-printable character '\\#03o' at offset %ld",
+                tmpbuf [i], i);
+
+    delete[] tmpbuf;
+}
+
+/***********************************************************************/
+
+static void
+test_error (const char *fname)
+{
+    rw_info (0, 0, __LINE__, "ifstream extraction with a conversion error");
+
+    const char outbuf[] = {
+        "abcdefghijklmnopqrstuvwxyz\\x20\\xzzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+        //                                 ^
+        //                                 |
+        // error (invalid hex sequence) ---+
+    };
+
+    // write out a text file containing a conversion error
+    std::ofstream ostrm (fname);
+
+    ostrm << outbuf;
+
+    ostrm.close ();
+
+    // read the file back in using the conversion facet
+    std::ifstream istrm (fname);
+
+    // user-defined code conversion facet
+    const cformat fmt (1 /* prevent locale from deleting */);
+
+    // create a locale by combining the classic locale and our UD facet
+    // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
+    std::locale l (std::locale::classic (), &fmt);
+
+    // imbue locale with formatting facet into streams and save previous
+    l = istrm.imbue (l);
+
+    char inbuf [sizeof outbuf * 4] = { 0 };
+
+    // try to read partial contents of the file
+    // including the conversion error into the buffer
+    istrm.read (inbuf, 26L + 4L /* "a..z" <space> <error> "AB" */);
+
+    // verify that the operation failed, eofbit is set since
+    // less than the requested number of characters have been read
+    RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
+
+    // verify that the 26 characters 'a' through 'z' plus
+    // the space (i.e., 27 chars) have been extracted
+    rw_assert (27 == istrm.gcount (), 0, __LINE__,
+               "ifstream::read () extracted %d, expected 27",
+               istrm.gcount ());
+
+    rw_assert (   0 == std::ifstream::traits_type::compare (inbuf, outbuf, 26)
+               && ' ' == inbuf [26], 0, __LINE__,
+               "ifstream::read () got \"%s\", expected \"%.26s \"",
+               inbuf, outbuf);
+
+    istrm.clear ();
+
+    // try to read again, and verify that the operation fails
+    istrm.read (inbuf, sizeof inbuf);
+
+    RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
+
+    rw_assert (0 == istrm.gcount (), 0, __LINE__,
+               "ifstream::read () extracted %d, expected 0",
+               istrm.gcount ());
+
+    // replace the imbued locale
+    istrm.imbue (l);
+}
+
+/***********************************************************************/
+
+static void
+test_seek (const char *fname,
+           std::size_t bufsize = std::size_t (-1))
+{
+    std::ifstream f (fname);
+
+    // make sure stream has been successfully opened
+    RW_ASSERT_STATE (f, std::ios::goodbit);
+
+    // set buffer size if specified
+    if (std::size_t (-1) != bufsize) {
+
+        rw_info (0, 0, __LINE__,
+                 "ifstream::seekg()/tellg() - %zu byte buffer", bufsize);
+
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+        f.rdbuf ()->pubsetbuf (0, bufsize);
+    }
+    else {
+        rw_info (0, 0, __LINE__,
+                 "ifstream::seekg()/tellg() - default buffer size");
+    }
+
+    // user-defined code conversion facet
+    const cformat fmt (1 /* prevent locale from deleting */);
+
+    // create a locale by combining the classic locale and our UD facet
+    // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
+    std::locale l (std::locale::classic (), &fmt);
+
+    // imbue locale with formatting facet into streams and save previous
+    l = f.imbue (l);
+
+    // seek to the beginning of stream (safe)
+    f.seekg (0);
+    RW_ASSERT_STATE (f, std::ios::goodbit);
+
+    const unsigned char max = UCHAR_MAX - '~';
+
+    for (std::size_t n = 0; n != std::size_t (max - 1); ++n) {
+
+        const char delim = char ('~' + n);
+
+        // skip over chars until the terminating delim (and extract it)
+        f.ignore (0x10000, std::fstream::traits_type::to_int_type (delim));
+        rw_assert (f.good (), 0, __LINE__,
+                   "istream::ignore (0x10000, '\\%03o'); "
+                   "rdstate() = %{Is}, gcount() = %ld",
+                   delim, f.rdstate (), f.gcount ());
+
+        // alternate between exercising seekg() and pubsync()
+        if (n % 2) {
+            // seek inplace (offset must be 0 for MB encodings)
+            f.seekg (0, std::ios::cur);
+            RW_ASSERT_STATE (f, std::ios::goodbit);
+        }
+        else {
+            // filebuf::pubsync() must return 0
+            int syn = f.rdbuf ()->pubsync ();
+            rw_assert (0 == syn, 0, __LINE__,
+                       "filebuf::pubsync () == 0, got %d", syn);
+        }
+
+        // skip exactly one char forward (retrieve a space)
+        char c = char ();
+        f.get (c);
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+        rw_assert (' ' == c, 0, __LINE__,
+                   "istream::get(char_type) got '\\%03o', expected ' '", c);
+
+        // get current file position
+        const std::ifstream::pos_type pos = f.tellg ();
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+
+        // extract offset - should be the same as pos
+        long offset = 0;
+        f >> offset;
+
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+
+        rw_assert (long (pos) == offset, 0, __LINE__,
+                   "ifstream::operator>>() expected %ld, got %ld",
+                   long (pos), offset);
+
+        // in_avail() must return a value > 0
+        std::streamsize avail = f.rdbuf ()->in_avail ();
+        rw_assert (avail > 0, 0, __LINE__,
+                   "filebuf::in_avail() expected > 0, got %ld", avail);
+
+        // "rewind" stream to the beginning
+        f.seekg (0);
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+
+        // try seeking to the previous position
+        f.seekg (pos);
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+        rw_assert (f.tellg () == pos, 0, __LINE__,
+                   "istream::seekg (%ld); tellg () returns %ld",
+                   long (pos), long (f.tellg ()));
+
+        // re-read offset - should be the same as file pos
+        f >> offset;
+
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+
+        rw_assert (long (pos) == offset, 0, __LINE__,
+                   "ifstream::operator>>() expected %ld, got %ld",
+                   long (pos), offset);
+    }
+
+    // ignore the rest of file, eofbit must be set
+    f.ignore (0x10000);
+    RW_ASSERT_STATE (f, std::ios::eofbit);
+
+    // in_avail() must return 0
+    const std::streamsize avail = f.rdbuf ()->in_avail ();
+    rw_assert (0 == avail, 0, __LINE__,
+               "filebuf::in_avail() expected 0, got %ld", avail);
+
+    // imbue original locale (currently imbued locale
+    // will be destroyed prior to the destruction of `f')
+    f.imbue (l);
+}
+
+/***********************************************************************/
+
+static int
+run_test (int, char*[])
+{
+    // self-test make sure facet works
+    self_test ();
+
+    // user-defined code conversion facet
+    const cformat fmt (1 /* prevent locale from deleting */);
+
+    // create a locale by combining the classic locale and our UD facet
+    // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
+    std::locale l (std::locale::classic (), &fmt);
+
+    const char *fname = rw_tmpnam (0);
+
+    if (!fname)
+        return 1;
+
+    // will be populated with file offsets and escape sequences
+    char buffer [4096] = { '\0' };
+
+    int buflen = 0;
+
+    // generate file contents using UD conversion
+    if (1) {
+        rw_info (0, 0, __LINE__,
+                 "ofstream insertion with multibyte conversion");
+
+        std::ofstream f (fname);
+
+        // make sure file stream has been successfully opened
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+
+        // imbue locale with formatting facet into stream
+        f.imbue (l);
+
+        for (std::size_t i = 1; i != UCHAR_MAX + 1U; ++i) {
+
+            const std::ofstream::pos_type pos = f.tellp ();
+
+            RW_ASSERT_STATE (f, std::ios::goodbit);
+
+            buflen = std::strlen (buffer);
+
+            // append the file offset followed by a (possibly escaped) char
+            std::sprintf (buffer + buflen, "%ld %c ", long (pos), char (i));
+
+            // write out the just appended portion of the buffer
+            f << (buffer + buflen);
+
+            RW_ASSERT_STATE (f, std::ios::goodbit);
+        }
+
+        buflen = std::strlen (buffer);
+
+        // file contains the contents of buffer with non-printable
+        // chars replaced with escape sequences (e.g., tabs with '\t', etc.)
+    }
+
+
+    // read contents of file w/o conversion
+    test_noconv (fname);
+
+    // read contents of file, apply conversion
+    if (1) {
+        rw_info (0, 0, __LINE__,
+                 "ifstream extraction with multibyte conversion");
+
+        std::ifstream f (fname);
+
+        // make sure file stream has been successfully opened
+        RW_ASSERT_STATE (f, std::ios::goodbit);
+
+        // imbue locale with formatting facet into stream
+        f.imbue (l);
+
+        // allocate buffer large enough to accomodate the converted
+        // (i.e. internal) sequence
+        char tmpbuf [sizeof buffer];
+
+        // read file contents into buffer, convert escape sequences
+        // into the corresponding (perhaps unprintable) characters
+        const std::streamsize n = f.read (tmpbuf, sizeof tmpbuf).gcount ();
+
+        // 27.6.1.3, p28 - read() sets eofbit | failbit
+        // if end-of-file occurs on the input sequence
+        RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
+
+        rw_assert (long (n) == buflen, 0, __LINE__,
+                   "ifstream::read (%#p, %d); read %ld, expected %d",
+                   tmpbuf, sizeof tmpbuf, long (n), buflen);
+
+        // assert that converted file contents are the same
+        // as the originally generated buffer
+        const long len = long (n) < buflen ? long (n) : buflen;
+        for (long i = 0; i != len; ++i) {
+            if (tmpbuf [i] != buffer [i]) {
+                rw_assert (0, 0, __LINE__,
+                           "'\\%03o' == '\\%03o'; offset %d",
+                           (unsigned char)buffer [i],
+                           (unsigned char)tmpbuf [i], i);
+                break;
+            }
+        }
+    }
+
+
+    // test with default buffer
+    test_seek (fname);
+
+    // retest with buffer of user-defined size
+    for (std::size_t n = 4096; n != std::size_t (-1);
+         n -= 1024 < n ? 1024 : 256 < n ? 256 : 16 < n ? 16 : 1)
+        test_seek (fname, n);
+
+
+    // test with errors during conversion
+    test_error (fname);
+
+    // remove a temporary file
+    std::remove (fname);
+
+    return 0;
+}
+
+/**************************************************************************/
+
+int main (int argc, char *argv[])
+{
+    return rw_test (argc, argv, __FILE__,
+                    "lib.filebuf",
+                    0 /* no comment */,
+                    run_test,
+                    "", 0);
+}

Propchange: incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
------------------------------------------------------------------------------
    svn:keywords = Id