You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2006/06/10 00:16:26 UTC
svn commit: r413181 -
/incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
Author: sebor
Date: Fri Jun 9 15:16:26 2006
New Revision: 413181
URL: http://svn.apache.org/viewvc?rev=413181&view=rev
Log:
2006-06-09 Martin Sebor <se...@roguewave.com>
STDCXX-4
* 27.filebuf.codecvt.cpp: New test exercising lib.filebuf with
a user-defined codecvt facet.
Added:
incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp (with props)
Added: incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp?rev=413181&view=auto
==============================================================================
--- incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp (added)
+++ incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp Fri Jun 9 15:16:26 2006
@@ -0,0 +1,1174 @@
+/************************************************************************
+ *
+ * codecvt.cpp - test exercising file streams and code conversion
+ *
+ * $Id$
+ *
+ ************************************************************************
+ *
+ * Copyright 2006 The Apache Software Foundation or its licensors,
+ * as applicable.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ **************************************************************************/
+
+// PROBLEM DESCRIPTION:
+// basic_ofstream<>::overflow() mangles a multibyte sequence when using
+// code conversion. This functionality of the class isn't currently (as
+// of the date of the creation of the test) being exercised by our
+// testsuite due to the lack of a suitable locale (e.g., ja_JP).
+
+// TEST DESCRIPTION:
+// test creates a temporary file and fills it with series of pairs
+// <offset, seq>, where `offset' is the offset from the beginning of
+// the file to the first (decimal) digit of offset, and `seq' is
+// a character in the range [1, CHAR_MAX] possibly converted to an
+// escape or trigraph sequence according to the rules described
+// in 2.3 and 2.13.2
+//
+// the program then exercises the stream's (actually, the stream
+// file buffer's) ability to extract and seek within such a file
+// thus testing the stream's ability to crrectly interact with
+// the codecvt facet installed in the imbued locale
+
+
+#include <rw/_defs.h>
+#if defined (__IBMCPP__) && !defined (_RWSTD_NO_IMPLICIT_INCLUSION)
+ // disable implicit inclusion to work around a limitation
+ // in IBM VisualAge
+# define _RWSTD_NO_IMPLICIT_INCLUSION
+#endif
+
+
+#include <fstream>
+#include <sstream>
+
+#include <climits> // for UCHAR_MAX
+#include <cstdio> // for fclose(), fopen(), fseek(), size_t
+#include <cstdlib> // for abort()
+#include <cstring> // for memset(), strcmp(), strlen()
+#include <cwchar> // for mbstate_t
+
+#include <driver.h>
+#include <file.h>
+
+/**************************************************************************/
+
+// code conversion facet suitable for replacement of the default
+// codecvt<char, char, mbstate_t> facet
+// cformat::do_out() converts printable ASCII characters into themselves,
+// control characters are converted to standard C escape sequences
+// cformat::do_in() reverses the effect of do_out()
+class cformat: public std::codecvt <char, char, std::mbstate_t>
+{
+ enum {
+ new_line = 0x0001, // convert '\n' to "\n"
+ horizontal_tab = 0x0002, // convert '\t' to "\t"
+ vertical_tab = 0x0004, // convert '\v' to "\v"
+ backspace = 0x0008, // convert '\b' to "\b"
+ carriage_return = 0x0010, // convert '\r' to "\r"
+ form_feed = 0x0020, // convert '\f' to "\f"
+ alert = 0x0040, // convert '\a' to "\a"
+ backslash = 0x0080, // convert '\\' to "\\"
+ question_mark = 0x0100, // convert '?' to "\?"
+ single_quote = 0x0200, // convert '\\'' to "\'"
+ double_quote = 0x0400, // convert '"' to "\""
+ trigraphs = 0x8000, // convert to/from trigrap sequences
+ hex = 0x1000 // hex notation in external representation
+ };
+
+ const int mask; // bitmaps of flags above
+
+public:
+ explicit
+ cformat (std::size_t ref = 0, int m = 0)
+ : std::codecvt<char, char, std::mbstate_t> (ref),
+ mask (m) { /* empty */ }
+
+protected:
+
+ virtual result
+ do_out (state_type&,
+ const intern_type*, const intern_type*,
+ const intern_type*&,
+ extern_type*, extern_type*, extern_type*&) const;
+
+ virtual result
+ do_in (state_type&, const extern_type*,
+ const extern_type*, const extern_type*&,
+ intern_type*, intern_type*, intern_type*&) const;
+
+ virtual result
+ do_unshift (state_type&, extern_type*,
+ extern_type*, extern_type*&) const {
+ // stateless encoding, no conversion necessary
+ return noconv;
+ }
+
+ virtual int
+ do_encoding () const _THROWS (()) {
+ return 0; // variable number of external chars per single internal
+ }
+
+ virtual bool
+ do_always_noconv () const _THROWS (()) {
+ return false; // conversion always necessary
+ }
+
+ // returns the maximum `N' of extern chars in the range [from, from_end)
+ // such that N represents max or fewer internal chars
+ virtual int
+ do_length (state_type&, const extern_type*,
+ const extern_type*, std::size_t) const;
+
+ // returns the max value do_length (s, from, from_end, 1) can return
+ // for any valid range [from, from_end) - see LWG issue 74 (a DR)
+ virtual int
+ do_max_length () const _THROWS (()) {
+ // assume that an internal char occupies at most 4 external chars
+ // this won't hold for e.g. '\x00001' etc., but will hold for all
+ // chars in the hex notation of up to two digits and all chars in
+ // octal notation (which are required to fit in 4 by the standard)
+ return 4;
+ }
+};
+
+
+std::codecvt_base::result
+cformat::do_out ( state_type& /* unused */,
+ const intern_type *from,
+ const intern_type *from_end,
+ const intern_type *&from_next,
+ extern_type *to,
+ extern_type *to_end,
+ extern_type *&to_next) const
+{
+ // assert 22.2.1.5.2, p1 preconditions
+ rw_assert (from <= from_end, __FILE__, __LINE__,
+ "codecvt::do_out (..., from = %#p, from + %d, %#p, "
+ "to = %#p, to + %d, %#p): from <= from_end",
+ from, from_end - from, from_next, to, to_end - to, to_next);
+
+ rw_assert (to <= to_end, __FILE__, __LINE__,
+ "codecvt::do_out (..., from = %#p, from + %d, %#p, "
+ "to = %#p, to + %d, %#p): to <= to_end",
+ from, from_end - from, from_next, to, to_end - to, to_next);
+
+ // assume no conversion will be performed
+ result res = noconv;
+
+ for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
+
+ // out of space
+ if (to_next == to_end) {
+ res = partial;
+ break;
+ }
+
+ // convert to unsigned to make sure comparison works
+ unsigned char ch = *from_next;
+ extern_type esc = extern_type ();
+
+ if (ch < ' ') {
+
+ // convert to a C escape sequence
+ switch (ch) {
+
+ case '\a':
+ if (!(mask & alert)) {
+ ch = 'a';
+ esc = '\\';
+ }
+ break;
+
+ case '\b':
+ if (!(mask & backspace)) {
+ ch = 'b';
+ esc = '\\';
+ }
+ break;
+
+ case '\t':
+ if (!(mask & horizontal_tab)) {
+ ch = 't';
+ esc = '\\';
+ }
+ break;
+
+ case '\n':
+ if (!(mask & new_line)) {
+ ch = 'n';
+ esc = '\\';
+ }
+ break;
+
+ case '\v':
+ if (!(mask & vertical_tab)) {
+ ch = 'v';
+ esc = '\\';
+ }
+ break;
+
+ case '\f':
+ if (!(mask & form_feed)) {
+ ch = 'f';
+ esc = '\\';
+ }
+ break;
+
+ case '\r':
+ if (!(mask & carriage_return)) {
+ ch = 'r';
+ esc = '\\';
+ }
+ break;
+
+ case '\\':
+ if (!(mask & backslash)) {
+ ch = '\\';
+ esc = '\\';
+ }
+ break;
+
+ default:
+ esc = '\\';
+ break;
+ }
+ }
+ else if (ch > '~') {
+ // convert to a C escape sequence (octal)
+ esc = '\\';
+ }
+ else {
+ // escape special characters
+ switch (ch) {
+ case '?':
+ if (!(mask & question_mark))
+ esc = '\\';
+ break;
+
+ case '\'':
+ if (!(mask & single_quote))
+ esc = '\\';
+ break;
+
+ case '"':
+ if (!(mask & double_quote))
+ esc = '\\';
+ break;
+
+ case '\\':
+ if (!(mask & backslash))
+ esc = '\\';
+ break;
+ }
+
+ if (!(mask & trigraphs)) {
+
+ // convert to a trigraph sequence
+ switch (ch) {
+ case '#': ch = '='; esc = '?'; break;
+ case '\\': ch = '/'; esc = '?'; break;
+ case '^': ch = '\''; esc = '?'; break;
+ case '[': ch = '('; esc = '?'; break;
+ case ']': ch = ')'; esc = '?'; break;
+ case '|': ch = '!'; esc = '?'; break;
+ case '{': ch = '<'; esc = '?'; break;
+ case '}': ch = '>'; esc = '?'; break;
+ case '~': ch = '-'; esc = '?'; break;
+ }
+ }
+ }
+
+ // process `ch' and `esc'
+
+ if ('\\' == esc) {
+
+ // conversion was performed
+ res = ok;
+
+ if (ch < ' ' || ch > '~') {
+
+ // need room for an escape followed by three ocal digits
+ if (4 > to_end - to_next) {
+ res = partial;
+ break;
+ }
+
+ static const char digits[] = "0123456789abcdef";
+
+ // add an escape character
+ *to_next++ = esc;
+
+ if (mask & hex) {
+ // add hex representation (exactly three chars)
+ *to_next++ = 'x';
+ *to_next++ = digits [(ch & 0xf0) >> 4];
+ *to_next++ = digits [ch & 0xf];
+ }
+ else {
+ // add octal representation (exactly three digits)
+ *to_next++ = digits [(ch & (7 << 6)) >> 6];
+ *to_next++ = digits [(ch & (7 << 3)) >> 3];
+ *to_next++ = digits [ch & 7];
+ }
+ }
+ else {
+ // need room for an escape followed by a single char
+ if (2 > to_end - to_next) {
+ res = partial;
+ break;
+ }
+
+ // add an escape char followed by the escaped char
+ *to_next++ = esc;
+ *to_next++ = ch;
+ }
+ }
+ else if ('?' == esc) {
+ // need room for a trigraph sequence
+ if (3 > to_end - to_next) {
+ res = partial;
+ break;
+ }
+
+ // conversion was performed
+ res = ok;
+
+ // add a trigraph sequence
+ *to_next++ = '?';
+ *to_next++ = '?';
+ *to_next++ = ch;
+ }
+ else {
+ // not escaped
+ *to_next++ = ch;
+ }
+ }
+
+ if (noconv == res) {
+ // 22.2.1.5.2, p2, Note: no conversion was necessary
+ from_next = from;
+ to_next = to;
+ }
+
+ rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
+ "user-defined codecvt: internal inconsistency");
+
+ rw_fatal (to_next >= to && to_next <= to_end, 0, __LINE__,
+ "user-defined codecvt: internal inconsistency");
+
+ return res;
+}
+
+
+std::codecvt_base::result
+cformat::do_in ( state_type& /* unused */,
+ const extern_type *from,
+ const extern_type *from_end,
+ const extern_type *&from_next,
+ intern_type *to,
+ intern_type *to_end,
+ intern_type *&to_next) const
+{
+ // assert 22.2.1.5.2, p1 preconditions
+ rw_assert (from <= from_end, __FILE__, __LINE__,
+ "codecvt::do_in (..., from = %#p, from + %d, %#p, "
+ "to = %#p, to + %d, %#p): from <= from_end",
+ from, from_end - from, from_next, to, to_end - to, to_next);
+
+ rw_assert (to <= to_end, __FILE__, __LINE__,
+ "codecvt::do_in (..., from = %#p, from + %d, %#p, "
+ "to = %#p, to + %d, %#p) to <= to_end",
+ from, from_end - from, from_next, to, to_end - to, to_next);
+
+ result res = ok;
+
+ for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
+
+ unsigned char ch = *from_next;
+ intern_type c = intern_type ();
+
+ if ('\\' == ch) {
+
+ if (2 > from_end - from_next) {
+ // ok is the correct value to return in this case,
+ // but partial should be handled as well for robustness
+ res = (from_end - (extern_type*)0) % 2 ? ok : partial;
+ break;
+ }
+
+ ch = from_next [1];
+
+ if ('x' == ch) {
+ // interpret a hex escape sequence
+
+ // advance past '\x'
+ const extern_type *next = from_next + 2;
+
+ // parse hex digits until a non-hex digits is encountered
+ for (; ; ++next) {
+
+ if (next == from_end) {
+ // do not advance to the end since there may be
+ // more digits following it (e.g., '\x012' with
+ // from_end pointing at '1' or '2')
+ return partial;
+ }
+
+ ch = *next;
+ if (ch >= '0' && ch <= '9')
+ c = (c << 4) | (ch - '0');
+ else if (ch >= 'a' && ch <= 'f')
+ c = (c << 4) | (ch - 'a' + 10);
+ else if (ch >= 'A' && ch <= 'F')
+ c = (c << 4) | (ch - 'A' + 10);
+ else if (next - from_next > 2)
+ break;
+ else {
+ return error; // non-hex digit immediately after '\x'
+ }
+ }
+
+ // advance to the end of parsed number
+ from_next = next - 1;
+ }
+ else if ('0' <= ch && '7' >= ch) {
+ // interpret a oct escape sequence
+
+ // (tentatively) advance past '\'
+ const extern_type *next = from_next + 1;
+
+ // parse at most three oct digits
+ for (; next - from_next < 4; ++next) {
+
+ if (next == from_end) {
+ // do not advance to the end since there may be
+ // more digits following it (e.g., '\x012' with
+ // from_end pointing at '1' or '2')
+ return partial;
+ }
+
+ ch = *next;
+ if (ch >= '0' && ch <= '7')
+ c = (c << 3) | (ch - '0');
+ else if (next - from_next)
+ break;
+ else {
+ // advance to the offending char
+ from_next = next;
+ return error; // non-oct digit immediately after '\'
+ }
+ }
+
+ // advance to the end of parsed number
+ from_next = next - 1;
+ }
+ else {
+ // interpret standard C escape sequence
+ switch (ch) {
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 't': c = '\t'; break;
+ case 'n': c = '\n'; break;
+ case 'v': c = '\v'; break;
+ case 'f': c = '\f'; break;
+ case 'r': c = '\r'; break;
+
+ // optional but allowed and escaped backslash
+ case '?': case '"': case '\'': case '\\': c = ch ; break;
+
+ // bad escape sequence
+ default: return error;
+ }
+
+ // advance past the initial '\'
+ ++from_next;
+ }
+ }
+ else if ('?' == ch && !(mask & trigraphs)) {
+ // (try to) convert a trigraph sequence
+ if ( 2 > from_end - from_next
+ || '?' == from_next [1] && 3 > from_end - from_next) {
+ res = partial;
+ break;
+ }
+
+ if ('?' == from_next [1]) {
+
+ // "??" (potentilly) introduces a trigraph sequence
+ switch (from_next [2]) {
+
+ case '=': c = '#'; break;
+ case '/': c = '\\'; break;
+ case '\'': c = '^'; break;
+ case '(': c = '['; break;
+ case ')': c = ']'; break;
+ case '!': c = '|'; break;
+ case '<': c = '{'; break;
+ case '>': c = '}'; break;
+ case '-': c = '~'; break;
+
+ default:
+ // no a trigraph sequence, won't convert
+ c = from_next [0]; // i.e., '?'
+ }
+
+ // skip the leading "??" of a trigraph sequence
+ if (c != from_next [0])
+ from_next += 2;
+ }
+ else
+ // ordinary (not escaped) character
+ c = ch;
+ }
+ else
+ // ordinary (not escaped) character
+ c = ch;
+
+ // to_next may be 0 (when called from do_length())
+ // doing pointer math on invalid pointers (null) has undefined behavior
+ // but will probably work in most cases
+ if (to_next)
+ *to_next = c;
+
+ ++to_next;
+
+ // in case of of the inner loops has reached end
+ if (from_next == from_end)
+ break;
+ }
+
+ rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
+ "user-defined codecvt: internal inconsistency");
+
+ rw_fatal (to_next >= to && (to_next <= to_end || !to_end), 0, __LINE__,
+ "user-defined codecvt: internal inconsistency");
+
+ return res;
+}
+
+
+int
+cformat::do_length (state_type&,
+ const extern_type *from,
+ const extern_type *from_end,
+ std::size_t max) const
+{
+ const extern_type *from_next;
+ intern_type *to_next = 0;
+
+ std::mbstate_t st;
+ std::memset (&st, 0, sizeof st);
+
+ // use do_in() with `to' of 0 to do the computation
+ // doing pointer math on invalid pointers (null) has undefined behavior
+ // but will probably work in most cases
+ do_in (st, from, from_end, from_next,
+ to_next, to_next + max, to_next);
+
+ return to_next - (intern_type*)0;
+}
+
+/***********************************************************************/
+
+// determines file size in bytes
+static std::streamsize
+fsize (const char *fname)
+{
+ std::FILE* const f = std::fopen (fname, "r");
+ if (!f || std::fseek (f, 0, SEEK_END))
+ return -1;
+
+ const std::streamsize size = std::ftell (f);
+
+ std::fclose (f);
+
+ return size;
+}
+
+/***********************************************************************/
+
+static void
+self_test ()
+{
+ rw_info (0, __FILE__, __LINE__,
+ "user-defined codecvt facet -- self test");
+
+ static const char* const result[] = {
+ "ok", "partial", "error", "noconv"
+ };
+
+ // user-defined code conversion facet
+ cformat fmt (1);
+
+ // original array of internal characters and one to which to convert
+ // an external representation back to (for comparison)
+ cformat::intern_type intrn [2][256] = { { '\0' } };
+
+ // array of external chars large enough to hold the internal array
+ // each internal char converts to at most 4 external chars
+ cformat::extern_type extrn [1024] = { '\0' };
+
+ // fill internal array with chars from '\1' to '\377'
+ for (std::size_t i = 0; i != sizeof intrn [0] - 1; ++i)
+ intrn [0][i] = cformat::intern_type (i + 1);
+
+ const cformat::intern_type *intrn_next_0 = 0;
+ cformat::intern_type *intrn_next_1 = 0;
+
+ cformat::extern_type *extrn_next = 0;
+
+
+ // dummy (state not used, conversion is stateless)
+ std::mbstate_t st;
+ std::memset (&st, 0, sizeof st);
+
+ // convert internal to external representation, substituting
+ // escape sequences for non-printable characters
+ std::codecvt_base::result res;
+
+ // convert array in internal representation to external representation
+ res = fmt.out (st,
+ intrn [0], intrn [0] + sizeof intrn [0], intrn_next_0,
+ extrn, extrn + sizeof extrn, extrn_next);
+
+ rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
+ "codecvt::out (); result == codecvt_base::ok, "
+ "got codecvt_base::%s", result [res]);
+
+ // assert that the external sequence is longer than the internal one
+ rw_assert (extrn_next - extrn > intrn_next_0 - intrn [0], 0, __LINE__,
+ "codecvt::out (); converted size %d, expected > %d",
+ extrn_next - extrn, intrn_next_0 - intrn [0]);
+
+ // convert external to internal representation, parsing
+ // multi-char escape sequences into single chars
+
+ const cformat::intern_type *next = extrn;
+
+ intrn_next_1 = intrn [1];
+
+ for (; next != extrn_next; ) {
+
+ // allow only a small buffer space to exercise partial conversion
+ std::size_t step = std::size_t (extrn_next - next);
+ if (step > 12)
+ step = 5 + step % 8;
+
+ res = fmt.in (st,
+ next, next + step, next,
+ intrn_next_1, intrn [1] + sizeof intrn [1], intrn_next_1);
+
+ if (std::codecvt_base::error == res)
+ break;
+ }
+
+ // assert that entrire sequence converted ok
+ rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
+ "codecvt::in (); result == codecvt_base::ok, got "
+ "codecvt_base::%s at offset %d", result [res], next - extrn);
+
+ rw_assert (intrn_next_1 == intrn [1] + sizeof intrn [1], 0, __LINE__,
+ "codecvt::in (); to_next == %#p, got %#p",
+ intrn [1] + sizeof intrn [1], intrn_next_1);
+
+ rw_assert (0 == std::strcmp (intrn [0], intrn [1]), 0, __LINE__,
+ "codecvt<>::out/in ()");
+}
+
+/***********************************************************************/
+
+#define RW_ASSERT_STATE(strm, state) \
+ rw_assert ((strm).rdstate () == (state), 0, __LINE__, \
+ "rdstate () == %{Is}, got %{Is}", \
+ (state), (strm).rdstate ())
+
+static void
+test_noconv (const char *fname)
+{
+ rw_info (0, 0, __LINE__, "ifstream extraction without conversion");
+
+ std::ifstream f (fname);
+
+ // make sure file stream has been successfully opened
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ // gain public access to protected members
+ struct pubbuf: std::streambuf {
+
+ // working around an MSVC 6.0 bug (PR #26330)
+ typedef std::streambuf Base;
+
+ virtual std::streamsize showmanyc () {
+ return Base::showmanyc ();
+ }
+ char* pubgptr () {
+ return Base::gptr ();
+ }
+ char* pubegptr () {
+ return Base::egptr ();
+ }
+ };
+
+ // use static cast through void* to avoid using reinterpret_cast
+ pubbuf *rdbuf = _RWSTD_STATIC_CAST (pubbuf*, (void*)f.rdbuf ());
+
+ std::streamsize filesize = rdbuf->showmanyc ();
+
+ // exercise 27.5.2.4.3, p1: showmanyc() returns the "estimated"
+ // size of the sequence (i.e., the file size in this case)
+ rw_assert (filesize == fsize (fname), 0, __LINE__,
+ "streambuf::showmanyc () == %ld, got %ld",
+ fsize (fname), filesize);
+
+ // exercise 27.5.2.2.3, p1
+ filesize = f.rdbuf ()->in_avail ();
+
+ const char *gptr = rdbuf->pubgptr ();
+ const char *egptr = rdbuf->pubgptr ();
+
+ rw_assert (filesize == (gptr < egptr ? egptr - gptr : fsize (fname)),
+ 0, __LINE__,
+ "streambuf::in_avail () == %ld, got %ld",
+ (gptr < egptr ? egptr - gptr : fsize (fname)), filesize);
+
+ // allocate buffer large enough to accomodate the converted
+ // (i.e. internal) sequence
+ const std::size_t bufsize = 0x10000; // 64k should do it
+ char *tmpbuf = new char [bufsize];
+
+ // fill with non-0 value to check for writes past the end
+ // (see also Onyx incident 14033)
+ std::memset (tmpbuf, '\x7f', bufsize);
+
+ // ecercise putback area
+ std::streamsize i;
+
+#ifndef _RWSTD_PBACK_SIZE
+# define _RWSTD_PBACK_SIZE 1
+#endif // _RWSTD_PBACK_SIZE
+
+ // _RWSTD_PBACK_SIZE is the size of the putback area the library
+ // was configured with; the macro expands to streamsize (N)
+ for (i = 0; i != _RWSTD_PBACK_SIZE + 1; ++i) {
+ // read a few characters, read must not append a '\0'
+ std::streamsize n = f.read (tmpbuf, i).gcount ();
+
+ // assert that read exactly `i' chars, buffer not null-terminared
+ rw_assert (i == n && '\x7f' == tmpbuf [i], 0, __LINE__,
+ "ifstream::read (%#p, %ld) read %ld,"
+ "buffer terminated with '\\%03o'",
+ tmpbuf, i, n, tmpbuf [i]);
+
+ // put back read characters; assert that they are the same
+ // as those in the corresponding positions in the buffer
+ for (std::streamsize j = 0; j != i; ++j) {
+ std::ifstream::int_type c = f.rdbuf ()->sungetc ();
+
+ typedef std::ifstream::traits_type Traits;
+
+ rw_assert (Traits::to_int_type (tmpbuf [n - j - 1]) == c,
+ 0, __LINE__,
+ "filebuf::sungetc() == '\\%03o', got '\\%03o",
+ tmpbuf [n - j - 1], c);
+ }
+
+ // re-read characters just put back
+ char buf [_RWSTD_PBACK_SIZE + 1];
+ std::memset (buf, '\x7f', sizeof buf);
+
+ std::streamsize n2 = f.read (buf, i).gcount ();
+
+ // assert that the requested number of chars were read in
+ rw_assert (i == n2 && '\x7f' == buf [i], 0, __LINE__,
+ "ifstream::read (%#p, %ld) read %ld,"
+ "buffer terminated with '\\%03o'",
+ buf, i, n2, buf [i]);
+
+ // assert that the read chars are those that were put back
+ for (std::streamsize k = 0; k != i; ++k) {
+ rw_assert (buf [k] == tmpbuf [k], 0, __LINE__,
+ "buffer mismatch at offset %ld: got '\\%03o', "
+ "expected '\\%03o'", k, buf [k], tmpbuf [k]);
+ // put character back again so that it can be read back in
+ f.rdbuf ()->sungetc ();
+ }
+ }
+
+ // read file contents into buffer (apply no conversion)
+ const std::streamsize n = f.read (tmpbuf, bufsize).gcount ();
+
+ // 27.6.1.3, p28 - read() sets eofbit | failbit
+ // if end-of-file occurs on the input sequence
+ RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
+
+ // assert that the entiire file has been read
+ rw_assert (n == filesize && '\x7f' == tmpbuf [n], 0, __LINE__,
+ "ifstream::read (%#p, %ld) read %ld, expected %ld; "
+ "buffer terminated with '\\%03o' [%s]",
+ tmpbuf, bufsize, n, filesize, tmpbuf [n], fname);
+
+ tmpbuf [n] = '\0';
+
+ // assert that file contains no control characters
+ bool b = true;
+ for (i = 0; b && i != UCHAR_MAX + 1; ++i) {
+ if (i >= ' ' || i <= '~')
+ continue;
+ b = 0 == std::strchr (tmpbuf, char (i));
+ }
+
+ rw_assert (b, 0, __LINE__,
+ "unescaped non-printable character '\\#03o' at offset %ld",
+ tmpbuf [i], i);
+
+ delete[] tmpbuf;
+}
+
+/***********************************************************************/
+
+static void
+test_error (const char *fname)
+{
+ rw_info (0, 0, __LINE__, "ifstream extraction with a conversion error");
+
+ const char outbuf[] = {
+ "abcdefghijklmnopqrstuvwxyz\\x20\\xzzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ // ^
+ // |
+ // error (invalid hex sequence) ---+
+ };
+
+ // write out a text file containing a conversion error
+ std::ofstream ostrm (fname);
+
+ ostrm << outbuf;
+
+ ostrm.close ();
+
+ // read the file back in using the conversion facet
+ std::ifstream istrm (fname);
+
+ // user-defined code conversion facet
+ const cformat fmt (1 /* prevent locale from deleting */);
+
+ // create a locale by combining the classic locale and our UD facet
+ // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
+ std::locale l (std::locale::classic (), &fmt);
+
+ // imbue locale with formatting facet into streams and save previous
+ l = istrm.imbue (l);
+
+ char inbuf [sizeof outbuf * 4] = { 0 };
+
+ // try to read partial contents of the file
+ // including the conversion error into the buffer
+ istrm.read (inbuf, 26L + 4L /* "a..z" <space> <error> "AB" */);
+
+ // verify that the operation failed, eofbit is set since
+ // less than the requested number of characters have been read
+ RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
+
+ // verify that the 26 characters 'a' through 'z' plus
+ // the space (i.e., 27 chars) have been extracted
+ rw_assert (27 == istrm.gcount (), 0, __LINE__,
+ "ifstream::read () extracted %d, expected 27",
+ istrm.gcount ());
+
+ rw_assert ( 0 == std::ifstream::traits_type::compare (inbuf, outbuf, 26)
+ && ' ' == inbuf [26], 0, __LINE__,
+ "ifstream::read () got \"%s\", expected \"%.26s \"",
+ inbuf, outbuf);
+
+ istrm.clear ();
+
+ // try to read again, and verify that the operation fails
+ istrm.read (inbuf, sizeof inbuf);
+
+ RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
+
+ rw_assert (0 == istrm.gcount (), 0, __LINE__,
+ "ifstream::read () extracted %d, expected 0",
+ istrm.gcount ());
+
+ // replace the imbued locale
+ istrm.imbue (l);
+}
+
+/***********************************************************************/
+
+static void
+test_seek (const char *fname,
+ std::size_t bufsize = std::size_t (-1))
+{
+ std::ifstream f (fname);
+
+ // make sure stream has been successfully opened
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ // set buffer size if specified
+ if (std::size_t (-1) != bufsize) {
+
+ rw_info (0, 0, __LINE__,
+ "ifstream::seekg()/tellg() - %zu byte buffer", bufsize);
+
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+ f.rdbuf ()->pubsetbuf (0, bufsize);
+ }
+ else {
+ rw_info (0, 0, __LINE__,
+ "ifstream::seekg()/tellg() - default buffer size");
+ }
+
+ // user-defined code conversion facet
+ const cformat fmt (1 /* prevent locale from deleting */);
+
+ // create a locale by combining the classic locale and our UD facet
+ // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
+ std::locale l (std::locale::classic (), &fmt);
+
+ // imbue locale with formatting facet into streams and save previous
+ l = f.imbue (l);
+
+ // seek to the beginning of stream (safe)
+ f.seekg (0);
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ const unsigned char max = UCHAR_MAX - '~';
+
+ for (std::size_t n = 0; n != std::size_t (max - 1); ++n) {
+
+ const char delim = char ('~' + n);
+
+ // skip over chars until the terminating delim (and extract it)
+ f.ignore (0x10000, std::fstream::traits_type::to_int_type (delim));
+ rw_assert (f.good (), 0, __LINE__,
+ "istream::ignore (0x10000, '\\%03o'); "
+ "rdstate() = %{Is}, gcount() = %ld",
+ delim, f.rdstate (), f.gcount ());
+
+ // alternate between exercising seekg() and pubsync()
+ if (n % 2) {
+ // seek inplace (offset must be 0 for MB encodings)
+ f.seekg (0, std::ios::cur);
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+ }
+ else {
+ // filebuf::pubsync() must return 0
+ int syn = f.rdbuf ()->pubsync ();
+ rw_assert (0 == syn, 0, __LINE__,
+ "filebuf::pubsync () == 0, got %d", syn);
+ }
+
+ // skip exactly one char forward (retrieve a space)
+ char c = char ();
+ f.get (c);
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+ rw_assert (' ' == c, 0, __LINE__,
+ "istream::get(char_type) got '\\%03o', expected ' '", c);
+
+ // get current file position
+ const std::ifstream::pos_type pos = f.tellg ();
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ // extract offset - should be the same as pos
+ long offset = 0;
+ f >> offset;
+
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ rw_assert (long (pos) == offset, 0, __LINE__,
+ "ifstream::operator>>() expected %ld, got %ld",
+ long (pos), offset);
+
+ // in_avail() must return a value > 0
+ std::streamsize avail = f.rdbuf ()->in_avail ();
+ rw_assert (avail > 0, 0, __LINE__,
+ "filebuf::in_avail() expected > 0, got %ld", avail);
+
+ // "rewind" stream to the beginning
+ f.seekg (0);
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ // try seeking to the previous position
+ f.seekg (pos);
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+ rw_assert (f.tellg () == pos, 0, __LINE__,
+ "istream::seekg (%ld); tellg () returns %ld",
+ long (pos), long (f.tellg ()));
+
+ // re-read offset - should be the same as file pos
+ f >> offset;
+
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ rw_assert (long (pos) == offset, 0, __LINE__,
+ "ifstream::operator>>() expected %ld, got %ld",
+ long (pos), offset);
+ }
+
+ // ignore the rest of file, eofbit must be set
+ f.ignore (0x10000);
+ RW_ASSERT_STATE (f, std::ios::eofbit);
+
+ // in_avail() must return 0
+ const std::streamsize avail = f.rdbuf ()->in_avail ();
+ rw_assert (0 == avail, 0, __LINE__,
+ "filebuf::in_avail() expected 0, got %ld", avail);
+
+ // imbue original locale (currently imbued locale
+ // will be destroyed prior to the destruction of `f')
+ f.imbue (l);
+}
+
+/***********************************************************************/
+
+static int
+run_test (int, char*[])
+{
+ // self-test make sure facet works
+ self_test ();
+
+ // user-defined code conversion facet
+ const cformat fmt (1 /* prevent locale from deleting */);
+
+ // create a locale by combining the classic locale and our UD facet
+ // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
+ std::locale l (std::locale::classic (), &fmt);
+
+ const char *fname = rw_tmpnam (0);
+
+ if (!fname)
+ return 1;
+
+ // will be populated with file offsets and escape sequences
+ char buffer [4096] = { '\0' };
+
+ int buflen = 0;
+
+ // generate file contents using UD conversion
+ if (1) {
+ rw_info (0, 0, __LINE__,
+ "ofstream insertion with multibyte conversion");
+
+ std::ofstream f (fname);
+
+ // make sure file stream has been successfully opened
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ // imbue locale with formatting facet into stream
+ f.imbue (l);
+
+ for (std::size_t i = 1; i != UCHAR_MAX + 1U; ++i) {
+
+ const std::ofstream::pos_type pos = f.tellp ();
+
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ buflen = std::strlen (buffer);
+
+ // append the file offset followed by a (possibly escaped) char
+ std::sprintf (buffer + buflen, "%ld %c ", long (pos), char (i));
+
+ // write out the just appended portion of the buffer
+ f << (buffer + buflen);
+
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+ }
+
+ buflen = std::strlen (buffer);
+
+ // file contains the contents of buffer with non-printable
+ // chars replaced with escape sequences (e.g., tabs with '\t', etc.)
+ }
+
+
+ // read contents of file w/o conversion
+ test_noconv (fname);
+
+ // read contents of file, apply conversion
+ if (1) {
+ rw_info (0, 0, __LINE__,
+ "ifstream extraction with multibyte conversion");
+
+ std::ifstream f (fname);
+
+ // make sure file stream has been successfully opened
+ RW_ASSERT_STATE (f, std::ios::goodbit);
+
+ // imbue locale with formatting facet into stream
+ f.imbue (l);
+
+ // allocate buffer large enough to accomodate the converted
+ // (i.e. internal) sequence
+ char tmpbuf [sizeof buffer];
+
+ // read file contents into buffer, convert escape sequences
+ // into the corresponding (perhaps unprintable) characters
+ const std::streamsize n = f.read (tmpbuf, sizeof tmpbuf).gcount ();
+
+ // 27.6.1.3, p28 - read() sets eofbit | failbit
+ // if end-of-file occurs on the input sequence
+ RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
+
+ rw_assert (long (n) == buflen, 0, __LINE__,
+ "ifstream::read (%#p, %d); read %ld, expected %d",
+ tmpbuf, sizeof tmpbuf, long (n), buflen);
+
+ // assert that converted file contents are the same
+ // as the originally generated buffer
+ const long len = long (n) < buflen ? long (n) : buflen;
+ for (long i = 0; i != len; ++i) {
+ if (tmpbuf [i] != buffer [i]) {
+ rw_assert (0, 0, __LINE__,
+ "'\\%03o' == '\\%03o'; offset %d",
+ (unsigned char)buffer [i],
+ (unsigned char)tmpbuf [i], i);
+ break;
+ }
+ }
+ }
+
+
+ // test with default buffer
+ test_seek (fname);
+
+ // retest with buffer of user-defined size
+ for (std::size_t n = 4096; n != std::size_t (-1);
+ n -= 1024 < n ? 1024 : 256 < n ? 256 : 16 < n ? 16 : 1)
+ test_seek (fname, n);
+
+
+ // test with errors during conversion
+ test_error (fname);
+
+ // remove a temporary file
+ std::remove (fname);
+
+ return 0;
+}
+
+/**************************************************************************/
+
+int main (int argc, char *argv[])
+{
+ return rw_test (argc, argv, __FILE__,
+ "lib.filebuf",
+ 0 /* no comment */,
+ run_test,
+ "", 0);
+}
Propchange: incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/stdcxx/trunk/tests/iostream/27.filebuf.codecvt.cpp
------------------------------------------------------------------------------
svn:keywords = Id