You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2007/05/05 23:46:43 UTC

svn commit: r535564 - in /incubator/stdcxx/trunk/examples/manual: mbsrtowcs.cpp out/mbsrtowcs.out

Author: sebor
Date: Sat May  5 14:46:42 2007
New Revision: 535564

URL: http://svn.apache.org/viewvc?view=rev&rev=535564
Log:
2007-05-05  Martin Sebor  <se...@roguewave.com>

	* mbsrtowcs.cpp: New example program to demonstrate an implementation
	of the C Standard Library function mbsrtowcs() in terms of the C++
	Standard Library codecvt facet precipitated by the Usenet thread:
	http://groups.google.com/group/comp.lang.c++/browse_thread/thread/85ffe366e1d65734
	* mbsrtowcs.out: Expected output of the example program.

Added:
    incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp   (with props)
    incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out

Added: incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp?view=auto&rev=535564
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp (added)
+++ incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp Sat May  5 14:46:42 2007
@@ -0,0 +1,220 @@
+/**************************************************************************
+ *
+ * mbsrtowcs.cpp
+ *
+ * Example program to demonstrate an implementation of the C Standard
+ * Library function mbsrtowcs() in terms of the C++ Standard Library
+ * codecvt facet.
+ *
+ * $Id$
+ *
+ ***************************************************************************
+ *
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ * 
+ **************************************************************************/
+ 
+#include <cassert>    // for assert()
+#include <cerrno>     // for EILSEQ, errno
+#include <cstring>    // for strlen()
+#include <cwchar>     // for mbstate_t
+#include <ios>        // for hex
+#include <iostream>   // for cout
+#include <locale>     // for codecvt, locale
+
+#include <examples.h>
+
+
+// my_mbsrtowcs() behaves 
+std::size_t
+my_mbsrtowcs (std::mbstate_t *pstate,
+              wchar_t        *dst,
+              const char     *src,
+              std::size_t     size)
+{
+    const std::locale global;
+
+    typedef std::codecvt<wchar_t, char, std::mbstate_t> CodeCvt;
+
+    // retrieve the codecvt facet from the global locale
+    const CodeCvt &cvt = std::use_facet<CodeCvt>(global);
+
+    // use a small local buffer when dst is null and ignore size
+    wchar_t buf [4];
+    if (0 == dst) {
+        dst  = buf;
+        size = sizeof buf / sizeof *buf;
+    }
+
+    // set up pointers into the source sequence
+    const char*       from      = src;
+    const char* const from_end  = from + std::strlen (from);
+    const char*       from_next = from;
+
+    // set up pointers into the destination sequence
+    wchar_t*       to     = dst;
+    wchar_t* const to_end = to + size;
+    wchar_t*       to_next;
+
+    // number of non-NUL wide characters stored in destination buffer
+    std::size_t nconv = 0;
+
+    // use a local state when pstate is null (i.e., emulate mbstowcs)
+    std::mbstate_t state = std::mbstate_t ();
+    if (0 == pstate)
+        pstate = &state;
+
+    for ( ; from_next != from_end && to != to_end;
+          from = from_next, to = dst == buf ? dst : to_next) {
+
+        // convert a (sub)sequence of the source buffer into
+        // the destination buffer
+        const std::codecvt_base::result res =
+            cvt.in (*pstate,
+                    from, from_end, from_next,
+                    to, to_end, to_next);
+
+        // verify the consistency of the xxx_next pointers
+        assert (from <= from_next && from_next <= from_end);
+        assert (to <= to_next && to_next <= to_end);
+
+        // process conversion result
+        switch (res) {
+
+        case std::codecvt_base::error:
+            // conversion error
+            errno = EILSEQ;
+            return std::size_t (-1);
+
+        case std::codecvt_base::noconv:
+            // only codecvt<T, T> (i.e., facets where intern_type and
+            // extern_type are identical) is allowed to return noconv
+            // treat this case as an error even though it indicates
+            // a bad (incorrectly implemented) codecvt facet
+            return std::size_t (-1);
+
+        case std::codecvt_base::partial:
+            // partial conversion (incomplete character or not enough
+            // room in destination buffer to convert the entire source
+            // sequence)
+            if (dst != buf || std::size_t (to_next - to) < size) {
+                errno = EILSEQ;
+                return std::size_t (-1);
+            }
+
+            nconv += to_next - to;
+            break;
+
+        case std::codecvt_base::ok:
+            // complete conversion of an initial subsequence (but not
+            // necessarily all) of the source buffer
+            nconv += to_next - to;
+
+            if (dst == buf && from_next == from_end)
+                return nconv;
+
+            break;
+        }
+    }
+
+    return nconv;
+}
+
+
+int main ()
+{
+    static const char* const mbs [] = {
+        "a", "abc",
+        // <U0391>: Greek letter Alpha
+        "\xce\x91",
+        // <U0391><U0392>: Greek letters Alpha Beta
+        "\xce\x91\xce\x91\xce\x92",
+        // <U0391><U0392><U0393>: Greek letters Alpha Beta Gamma
+        "\xce\x91\xce\x92\xce\x93",
+        // <U0966>: Devangari digit 0
+        "\xe0\xa5\xa6",
+        // <U0967><U0966>: Devangari digits 10
+        "\xe0\xa5\xa7\xe0\xa5\xa6",
+        // <U0968><U0967><U0966>: Devangari digits 210
+        "\xe0\xa5\xa8\xe0\xa5\xa7\xe0\xa5\xa6"
+    };
+
+    typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> CodeCvt;
+
+    // create a UCS/UTF-8 codecvt facet and install it in a locale
+    const std::locale utf (std::locale (""), new CodeCvt ("UTF-8@UCS"));
+
+    // set the global locale to use the UCS/UTF-8 codecvt facet
+    std::locale::global (utf);
+
+    // iterate over examples of UTF-8 sequences and output the wide
+    // character sequence each converts to
+    for (std::size_t i = 0; i != sizeof mbs / sizeof *mbs; ++i) {
+
+        wchar_t *dst = 0;
+
+        // initialize state to the initial shift state
+        std::mbstate_t state = std::mbstate_t ();
+
+        // obtain the length of the wide character sequence
+        // corresponding to the multibyte source sequence,
+        // not including the terminating NUL
+        const std::size_t length =
+            my_mbsrtowcs (&state, 0, mbs [i], std::size_t (-1));
+
+        if (std::size_t (-1) == length) {
+            std::cerr << "Error computing length of destination sequence.\n";
+            continue;
+        }
+
+        // allocate a wide character buffer large enough to hold
+        // the converted sequence including the terminating NUL
+        dst = new wchar_t [length + 1];
+
+        // reset state to the initial shift state
+        state = std::mbstate_t ();
+
+        // convert the narrow character source sequence into
+        // the wide character buffer
+        const std::size_t nconv =
+            my_mbsrtowcs (&state, dst, mbs [i], length + 1);
+
+        if (length != nconv) {
+            std::cerr << "Error converting source sequence.\n";
+            continue;
+        }
+
+        // write out the wide and the narrow sequences
+        std::cout << "UCS-2 (" << std::dec << length << "): " << std::hex;
+
+        for (const wchar_t *pwc = dst; *pwc != L'\0'; ++pwc)
+            std::cout << "U+" << unsigned (*pwc) << ' ';
+
+        std::cout << " ==> UTF-8: ";
+
+        typedef unsigned char UChar;
+
+        for (const char *pc = mbs [i]; *pc; ++pc)
+            std::cout << "\\x" << int (UChar (*pc));
+
+        std::cout << "\"\n";
+
+        delete[] dst;
+    }
+
+    return 0;
+}

Propchange: incubator/stdcxx/trunk/examples/manual/mbsrtowcs.cpp
------------------------------------------------------------------------------
    svn:keywords = Id

Added: incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out?view=auto&rev=535564
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out (added)
+++ incubator/stdcxx/trunk/examples/manual/out/mbsrtowcs.out Sat May  5 14:46:42 2007
@@ -0,0 +1,8 @@
+UCS-2 (1): U+61  ==> UTF-8: \x61"
+UCS-2 (3): U+61 U+62 U+63  ==> UTF-8: \x61\x62\x63"
+UCS-2 (1): U+391  ==> UTF-8: \xce\x91"
+UCS-2 (3): U+391 U+391 U+392  ==> UTF-8: \xce\x91\xce\x91\xce\x92"
+UCS-2 (3): U+391 U+392 U+393  ==> UTF-8: \xce\x91\xce\x92\xce\x93"
+UCS-2 (1): U+966  ==> UTF-8: \xe0\xa5\xa6"
+UCS-2 (2): U+967 U+966 U+393  ==> UTF-8: \xe0\xa5\xa7\xe0\xa5\xa6"
+UCS-2 (3): U+968 U+967 U+966  ==> UTF-8: \xe0\xa5\xa8\xe0\xa5\xa7\xe0\xa5\xa6"