You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2007/05/05 23:46:43 UTC
svn commit: r535564 - in /incubator/stdcxx/trunk/examples/manual:
mbsrtowcs.cpp out/mbsrtowcs.out
Author: sebor
Date: Sat May 5 14:46:42 2007
New Revision: 535564
URL: http://svn.apache.org/viewvc?view=rev&rev=535564
Log:
2007-05-05 Martin Sebor <se...@roguewave.com>
* mbsrtowcs.cpp: New example program to demonstrate an implementation
of the C Standard Library function mbsrtowcs() in terms of the C++
Standard Library codecvt facet precipitated by the Usenet thread:
http://groups.google.com/group/comp.lang.c++/browse_thread/thread/85ffe366e1d65734
* mbsrtowcs.out: Expected output of the example program.
Added:
incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp (with props)
incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out
Added: incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp?view=auto&rev=535564
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp (added)
+++ incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp Sat May 5 14:46:42 2007
@@ -0,0 +1,220 @@
+/**************************************************************************
+ *
+ * mbsrtowcs.cpp
+ *
+ * Example program to demonstrate an implementation of the C Standard
+ * Library function mbsrtowcs() in terms of the C++ Standard Library
+ * codecvt facet.
+ *
+ * $Id$
+ *
+ ***************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ **************************************************************************/
+
+#include <cassert> // for assert()
+#include <cerrno> // for EILSEQ, errno
+#include <cstring> // for strlen()
+#include <cwchar> // for mbstate_t
+#include <ios> // for hex
+#include <iostream> // for cout
+#include <locale> // for codecvt, locale
+
+#include <examples.h>
+
+
+// my_mbsrtowcs() behaves
+std::size_t
+my_mbsrtowcs (std::mbstate_t *pstate,
+ wchar_t *dst,
+ const char *src,
+ std::size_t size)
+{
+ const std::locale global;
+
+ typedef std::codecvt<wchar_t, char, std::mbstate_t> CodeCvt;
+
+ // retrieve the codecvt facet from the global locale
+ const CodeCvt &cvt = std::use_facet<CodeCvt>(global);
+
+ // use a small local buffer when dst is null and ignore size
+ wchar_t buf [4];
+ if (0 == dst) {
+ dst = buf;
+ size = sizeof buf / sizeof *buf;
+ }
+
+ // set up pointers into the source sequence
+ const char* from = src;
+ const char* const from_end = from + std::strlen (from);
+ const char* from_next = from;
+
+ // set up pointers into the destination sequence
+ wchar_t* to = dst;
+ wchar_t* const to_end = to + size;
+ wchar_t* to_next;
+
+ // number of non-NUL wide characters stored in destination buffer
+ std::size_t nconv = 0;
+
+ // use a local state when pstate is null (i.e., emulate mbstowcs)
+ std::mbstate_t state = std::mbstate_t ();
+ if (0 == pstate)
+ pstate = &state;
+
+ for ( ; from_next != from_end && to != to_end;
+ from = from_next, to = dst == buf ? dst : to_next) {
+
+ // convert a (sub)sequence of the source buffer into
+ // the destination buffer
+ const std::codecvt_base::result res =
+ cvt.in (*pstate,
+ from, from_end, from_next,
+ to, to_end, to_next);
+
+ // verify the consistency of the xxx_next pointers
+ assert (from <= from_next && from_next <= from_end);
+ assert (to <= to_next && to_next <= to_end);
+
+ // process conversion result
+ switch (res) {
+
+ case std::codecvt_base::error:
+ // conversion error
+ errno = EILSEQ;
+ return std::size_t (-1);
+
+ case std::codecvt_base::noconv:
+ // only codecvt<T, T> (i.e., facets where intern_type and
+ // extern_type are identical) is allowed to return noconv
+ // treat this case as an error even though it indicates
+ // a bad (incorrectly implemented) codecvt facet
+ return std::size_t (-1);
+
+ case std::codecvt_base::partial:
+ // partial conversion (incomplete character or not enough
+ // room in destination buffer to convert the entire source
+ // sequence)
+ if (dst != buf || std::size_t (to_next - to) < size) {
+ errno = EILSEQ;
+ return std::size_t (-1);
+ }
+
+ nconv += to_next - to;
+ break;
+
+ case std::codecvt_base::ok:
+ // complete conversion of an initial subsequence (but not
+ // necessarily all) of the source buffer
+ nconv += to_next - to;
+
+ if (dst == buf && from_next == from_end)
+ return nconv;
+
+ break;
+ }
+ }
+
+ return nconv;
+}
+
+
+int main ()
+{
+ static const char* const mbs [] = {
+ "a", "abc",
+ // <U0391>: Greek letter Alpha
+ "\xce\x91",
+ // <U0391><U0392>: Greek letters Alpha Beta
+ "\xce\x91\xce\x91\xce\x92",
+ // <U0391><U0392><U0393>: Greek letters Alpha Beta Gamma
+ "\xce\x91\xce\x92\xce\x93",
+ // <U0966>: Devangari digit 0
+ "\xe0\xa5\xa6",
+ // <U0967><U0966>: Devangari digits 10
+ "\xe0\xa5\xa7\xe0\xa5\xa6",
+ // <U0968><U0967><U0966>: Devangari digits 210
+ "\xe0\xa5\xa8\xe0\xa5\xa7\xe0\xa5\xa6"
+ };
+
+ typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> CodeCvt;
+
+ // create a UCS/UTF-8 codecvt facet and install it in a locale
+ const std::locale utf (std::locale (""), new CodeCvt ("UTF-8@UCS"));
+
+ // set the global locale to use the UCS/UTF-8 codecvt facet
+ std::locale::global (utf);
+
+ // iterate over examples of UTF-8 sequences and output the wide
+ // character sequence each converts to
+ for (std::size_t i = 0; i != sizeof mbs / sizeof *mbs; ++i) {
+
+ wchar_t *dst = 0;
+
+ // initialize state to the initial shift state
+ std::mbstate_t state = std::mbstate_t ();
+
+ // obtain the length of the wide character sequence
+ // corresponding to the multibyte source sequence,
+ // not including the terminating NUL
+ const std::size_t length =
+ my_mbsrtowcs (&state, 0, mbs [i], std::size_t (-1));
+
+ if (std::size_t (-1) == length) {
+ std::cerr << "Error computing length of destination sequence.\n";
+ continue;
+ }
+
+ // allocate a wide character buffer large enough to hold
+ // the converted sequence including the terminating NUL
+ dst = new wchar_t [length + 1];
+
+ // reset state to the initial shift state
+ state = std::mbstate_t ();
+
+ // convert the narrow character source sequence into
+ // the wide character buffer
+ const std::size_t nconv =
+ my_mbsrtowcs (&state, dst, mbs [i], length + 1);
+
+ if (length != nconv) {
+ std::cerr << "Error converting source sequence.\n";
+ continue;
+ }
+
+ // write out the wide and the narrow sequences
+ std::cout << "UCS-2 (" << std::dec << length << "): " << std::hex;
+
+ for (const wchar_t *pwc = dst; *pwc != L'\0'; ++pwc)
+ std::cout << "U+" << unsigned (*pwc) << ' ';
+
+ std::cout << " ==> UTF-8: ";
+
+ typedef unsigned char UChar;
+
+ for (const char *pc = mbs [i]; *pc; ++pc)
+ std::cout << "\\x" << int (UChar (*pc));
+
+ std::cout << "\"\n";
+
+ delete[] dst;
+ }
+
+ return 0;
+}
Propchange: incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp
------------------------------------------------------------------------------
svn:keywords = Id
Added: incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out?view=auto&rev=535564
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out (added)
+++ incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out Sat May 5 14:46:42 2007
@@ -0,0 +1,8 @@
+UCS-2 (1): U+61 ==> UTF-8: \x61"
+UCS-2 (3): U+61 U+62 U+63 ==> UTF-8: \x61\x62\x63"
+UCS-2 (1): U+391 ==> UTF-8: \xce\x91"
+UCS-2 (3): U+391 U+391 U+392 ==> UTF-8: \xce\x91\xce\x91\xce\x92"
+UCS-2 (3): U+391 U+392 U+393 ==> UTF-8: \xce\x91\xce\x92\xce\x93"
+UCS-2 (1): U+966 ==> UTF-8: \xe0\xa5\xa6"
+UCS-2 (2): U+967 U+966 U+393 ==> UTF-8: \xe0\xa5\xa7\xe0\xa5\xa6"
+UCS-2 (3): U+968 U+967 U+966 ==> UTF-8: \xe0\xa5\xa8\xe0\xa5\xa7\xe0\xa5\xa6"