You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2006/08/30 01:15:06 UTC

svn commit: r438284 - in /incubator/stdcxx/trunk/util: collate.cpp locale.cpp scanner.cpp scanner.h

Author: sebor
Date: Tue Aug 29 16:15:06 2006
New Revision: 438284

URL: http://svn.apache.org/viewvc?rev=438284&view=rev
Log:
2006-08-29  Martin Sebor  <se...@roguewave.com>

	STDCXX-277
	* locale.cpp (print_weight): Formatted weights with value greater
	than UCHAR_MAX as multibyte characters of value UCHAR_MAX or less.
	(write_coll_info): Introduced a convenience typedef and simplified.
	* collate.cpp (get_weight): Handled weights formatted as multibyte
	characters (using escape sequences).
	* scanner.h (convert_escape): Added an argument.
	* scanner.cpp (convert_escape): Handled multibyte characters.

Modified:
    incubator/stdcxx/trunk/util/collate.cpp
    incubator/stdcxx/trunk/util/locale.cpp
    incubator/stdcxx/trunk/util/scanner.cpp
    incubator/stdcxx/trunk/util/scanner.h

Modified: incubator/stdcxx/trunk/util/collate.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/collate.cpp?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/collate.cpp (original)
+++ incubator/stdcxx/trunk/util/collate.cpp Tue Aug 29 16:15:06 2006
@@ -6,16 +6,23 @@
  *
  ***************************************************************************
  *
- * Copyright (c) 1994-2005 Quovadx,  Inc., acting through its  Rogue Wave
- * Software division. Licensed under the Apache License, Version 2.0 (the
- * "License");  you may  not use this file except  in compliance with the
- * License.    You    may   obtain   a   copy   of    the   License    at
- * http://www.apache.org/licenses/LICENSE-2.0.    Unless   required    by
- * applicable law  or agreed to  in writing,  software  distributed under
- * the License is distributed on an "AS IS" BASIS,  WITHOUT WARRANTIES OR
- * CONDITIONS OF  ANY KIND, either  express or implied.  See  the License
- * for the specific language governing permissions  and limitations under
- * the License.
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
  * 
  **************************************************************************/
 
@@ -2020,28 +2027,34 @@
     else if (w.token == Scanner::tok_decimal_value 
              || w.token == Scanner::tok_hex_value
              || w.token == Scanner::tok_octal_value) {
-        // the weights are given in decimal form so get the value 
-        // and assign it to the weights 
-        int val;
-        const char* next_val = std::strchr(w.name.c_str(),
-                                           scanner_.escape_char ());
-        const char* next_weight = std::strchr (w.name.c_str(), ';');
+        // the weight is given in numerical form
+        const char* next_val =
+            std::strchr (w.name.c_str (), scanner_.escape_char ());
+
+        assert (0 != next_val);
+
+        const char* next_wt = std::strchr (w.name.c_str (), ';');
+
         while (weight_num < collate_out_.num_weights) {
-            int c = 0;
-            for ( c = 0; 
-                  0 != next_val && (0 == next_weight 
-                                    || next_val < next_weight);c++) {
-                val = scanner_.convert_escape (next_val);
 
-                weights[weight_num].weight[c] = val;
-                next_val = std::strchr(next_val + 1, scanner_.escape_char ());
+            std::size_t c;
+
+            for (c = 0; *next_val && (!next_wt || next_val < next_wt); ++c) {
+
+                const char* end = 0;
+
+                weights [weight_num].weight [c] =
+                    scanner_.convert_escape (next_val, &end, true);
+
+                assert (0 != end);
+
+                next_val = end;
             }
-            weights[weight_num].size = c;
 
-            weight_num++;
+            weights [weight_num++].size = c;
 
-            if (0 != next_weight && 0 != next_val)
-                next_weight = std::strchr (next_val, ';');
+            if (next_wt)
+                next_wt = std::strchr (next_val, ';');
                 
         }
 
@@ -2052,16 +2065,19 @@
         weights[weight_num].weight[0] = 0;
     }
     else if (w.token == Scanner::tok_string) {
-        // the weights are given in symbolic name form so
-        // extract the string inside quotes
-        std::string tmp (w.name.substr (1, w.name.size() - 2));
+        // the weights are given either in symbolic name form (e.g.,
+        // "<symbolic-name>" or in the form of a quoted multibyte
+        // character string (e.g., "\001\d010\x16\")
+        const std::string tmp (w.name.substr (1, w.name.size() - 2));
 
         // keeps track of the length of the weight
         unsigned char k = 0;
 
         // iterate thru the string content and retrieve the symbols
-        std::string::iterator it = tmp.begin ();
-        char ec = scanner_.escape_char ();
+        std::string::const_iterator it = tmp.begin ();
+
+        const char escape = scanner_.escape_char ();
+
         while (it != tmp.end ()) {
             // weight in string form
             std::string wsym;
@@ -2069,7 +2085,7 @@
             // next comes a symbolic name
             if (*it == '<') {
                 while (*it != '>') {
-                    if (*it == ec) 
+                    if (*it == escape) 
                         it++;
 
                     wsym += *it++;
@@ -2113,20 +2129,23 @@
                                 "symbolic name %s not found\n", 
                                 wsym.c_str());
                 }
-            } else if (*it == ec) {
-                std::string wstr (it, tmp.end ());
-                std::string::size_type idx;
-                
-                do{ 
-                    weights [weight_num].weight [k++] =
-                        scanner_.convert_escape (wstr.c_str ());
-
-                    idx = wstr.find (scanner_.escape_char (), 1);
-                    if (std::string::npos != idx )
-                        wstr = wstr.substr (idx);
-                } while (std::string::npos != idx);
+            }
+            else if (*it == escape) {
+
+                // weight is given in a quoted escape form
+                const char* const beg = tmp.c_str () + (it - tmp.begin ());
+                const char*       end = 0;
+
+                weights [weight_num].weight [k++] =
+                    scanner_.convert_escape (beg, &end, true);
+
+                assert (0 != end);
+
+                it += end - beg;
+
                 break;
-            } else 
+            }
+            else 
                 issue_diag (E_SYNTAX, true, &w,
                             "illegal string content as a weight");
         }
@@ -2174,4 +2193,3 @@
         }
     }
 }
-    

Modified: incubator/stdcxx/trunk/util/locale.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/locale.cpp?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/locale.cpp (original)
+++ incubator/stdcxx/trunk/util/locale.cpp Tue Aug 29 16:15:06 2006
@@ -6,22 +6,23 @@
  *
  ***************************************************************************
  *
- * Copyright 2005-2006 The Apache Software Foundation or its licensors,
- * as applicable.
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
  *
- * Copyright 2001-2006 Rogue Wave Software.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
  * 
  **************************************************************************/
 
@@ -72,7 +73,12 @@
 
 bool print_cat_names = false;
 bool print_keywords = false;
+
+// set to true in response to the "-h" command line option requesting
+// locale to try to use character names from the original charmap used
+// to create the locale (if possible)
 bool decode = false;
+
 bool is_utf8 = true;
 bool POSIX_output = true;
 
@@ -568,51 +574,61 @@
     return 0;
 }
 
+
 void print_weight (const unsigned int* weightp, 
                    unsigned int        num_weights,
                    unsigned int        longest_weight)
 {
     // FIXME: ignore the order of the element
-    weightp++;
+    ++weightp;
 
-    for (unsigned int k = 0; k < num_weights; k++) {
-        bool quoted = false;
+    for (unsigned k = 0; k != num_weights; ++k) {
 
-        if (*weightp && longest_weight > 1) {
-            quoted = true;
-            std::cout << "\"";
-        }
+        for (unsigned x = 0; x != longest_weight; ++x, ++weightp) {
 
-        for (unsigned int x = 0; x < longest_weight; x++){
             if (*weightp != UINT_MAX) {
-                if (*weightp == 0) 
-                    std::cout << "IGNORE";
-                else
-                    std::cout << "\\d" << *weightp;
-            }
-            weightp++;
-        }
+                if (0 == *weightp) 
+                    std::cout << "IGNORE;";
+                else if (*weightp <= UCHAR_MAX) {
+                    std::cout << "\\d" << *weightp << ';';
+                }
+                else {
+                    std::cout << '"';
+
+                    unsigned wt = *weightp;
+
+                    for (unsigned inx = sizeof wt; wt && inx--; ) {
 
-        if (quoted)
-            std::cout << "\"";
+                        const unsigned byte =
+                            (wt >> CHAR_BIT * inx) & UCHAR_MAX;
 
-        if (k != num_weights)
-            std::cout << ";";
+                        wt &= ~(wt << (CHAR_BIT * inx));
+
+                        if (byte || wt)
+                            std::cout << "\\d" << byte;
+                    }
+                    std::cout << "\";";
+                }
+            }
+        }
     }
 
     std::cout << '\n';
 }
 
+
 void write_coll_info (const std::string &ch, unsigned int idx,
                       unsigned int tab_num)
 {
+    typedef unsigned char UChar;
+
     if (collate_st->num_elms > 1) {
         typedef std::map <std::string, wchar_t>::const_iterator n_cmap2_iter;
         const unsigned int* tab = collate_st->get_n_tab (tab_num);
         unsigned int first = collate_st->get_first_char_in_n_tab(tab_num);
         for (unsigned int i = first; i <= UCHAR_MAX; i++) {
             std::string new_ch = ch;
-            new_ch += (unsigned char)i;
+            new_ch += UChar (i);
             if (tab[i - first] != UINT_MAX) {
                 if (tab[i - first] & 0x80000000) {
                     // it's an offset to another table
@@ -633,10 +649,12 @@
                     }
                     else {
                         for (unsigned int j = 0; j < idx; j++) {
-                            std::cout << "\\d" << (unsigned int)
-                                (unsigned char)new_ch[j];
+                            const UChar uc = UChar (new_ch [j]);
+
+                            std::cout << "\\d" << int (uc);
+
                             if (j != idx - 1)
-                                std::cout << ";";
+                                std::cout << ';';
                         }
                     }
                     std::cout << "  ";

Modified: incubator/stdcxx/trunk/util/scanner.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/scanner.cpp?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/scanner.cpp (original)
+++ incubator/stdcxx/trunk/util/scanner.cpp Tue Aug 29 16:15:06 2006
@@ -701,50 +701,69 @@
 }
 
 
-unsigned char Scanner::
-convert_escape (const char* esc, const char **pend /* = 0 */) const
+unsigned long Scanner::
+convert_escape (const char  *esc,
+                const char **pend  /* = 0 */,
+                bool         multi /* = false */) const
 {
     assert (0 != esc);
 
-    const char* s = esc;
+    const char escape = escape_char ();
 
-    if (escape_char () != *s)
+    if (escape != *esc)
         issue_diag (E_SYNTAX, true, 0,
                     "expected the escape character ('%c'): %s\n",
-                    escape_char (), esc);
+                    escape, esc);
 
-    int base = 16;
-    const char *basename = "hexadecimal";
+    unsigned long value = 0;
 
-    switch (*++s) {
-    case 'o': base = 8;  basename = "octal"; break;
-    case 'd': base = 10; basename = "decimal"; break;
-    case 'x': break;
-    default:
-        issue_diag (E_SYNTAX, true, 0,
-                    "one of { 'o', 'd', 'x' } expected following "
-                    "the escape character: %s\n", esc);
-    }    
+    for (const char *s = esc; ; ) {
 
-    ++s;
+        // escaped characters are octal by default
+        const char *basename = "octal";
+        int         base     = 8;
 
-    const char *end = 0;
+        switch (*++s) {
+        case 'd': ++s; base = 10; basename = "decimal"; break;
+        case 'x': ++s; base = 16; basename = "hexadecimal"; break;
 
-    if (!pend)
-        pend = &end;
+        case 'o': ++s;
+        case '0': case '1': case '2': case '3':
+        case '4': case '5': case '6': case '7':
+            break;
 
-    const long val = std::strtol (s, (char**)pend, base);
+        default:
+            issue_diag (E_SYNTAX, true, 0,
+                        "one of { 'o', 'd', 'x' } expected following "
+                        "the escape character: %s\n", esc);
+        }
 
-    if (pend == &end && *pend && **pend)
-        issue_diag (E_SYNTAX, true, 0,
-                    "%s constant expected: %s\n", basename, esc);
+        const char *end = 0;
+
+        if (!pend)
+            pend = &end;
+
+        const unsigned long byte = std::strtoul (s, (char**)pend, base);
+
+        if (!multi && pend == &end && **pend)
+            issue_diag (E_SYNTAX, true, 0,
+                        "%s constant expected: %s\n", basename, esc);
+
+        if (UCHAR_MAX < byte)
+            issue_diag (E_INVAL, true, 0,
+                        "%s byte value must be in the range [0, %d]: %s\n",
+                        basename, int (UCHAR_MAX), esc);
+
+        if (value >> (sizeof (unsigned long) - 1) * CHAR_BIT)
+            issue_diag (E_INVAL, true, 0, "integer overflow: %s\n", esc);
+
+        value = (value << CHAR_BIT) | byte;
 
-    if (val < 0 || val > long (UCHAR_MAX))
-        issue_diag (E_INVAL, true, 0,
-                    "%s value in the range [0, %lu) expected: %s\n",
-                    basename, long (UCHAR_MAX), esc);
+        if (**pend != escape || !multi)
+            break;
 
-    typedef unsigned char UChar;
+        s = *pend;
+    }
 
-    return UChar (val);
+    return value;
 }

Modified: incubator/stdcxx/trunk/util/scanner.h
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/scanner.h?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/scanner.h (original)
+++ incubator/stdcxx/trunk/util/scanner.h Tue Aug 29 16:15:06 2006
@@ -2,20 +2,27 @@
  *
  * scanner.h
  *
- * $Id: //stdlib/dev/source/stdlib/util/scanner.h#39 $
+ * $Id$
  *
  ***************************************************************************
  *
- * Copyright (c) 1994-2005 Quovadx,  Inc., acting through its  Rogue Wave
- * Software division. Licensed under the Apache License, Version 2.0 (the
- * "License");  you may  not use this file except  in compliance with the
- * License.    You    may   obtain   a   copy   of    the   License    at
- * http://www.apache.org/licenses/LICENSE-2.0.    Unless   required    by
- * applicable law  or agreed to  in writing,  software  distributed under
- * the License is distributed on an "AS IS" BASIS,  WITHOUT WARRANTIES OR
- * CONDITIONS OF  ANY KIND, either  express or implied.  See  the License
- * for the specific language governing permissions  and limitations under
- * the License.
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
  * 
  **************************************************************************/
 
@@ -24,6 +31,7 @@
 
 #include <string>
 #include <stack>
+#include <climits>   // for ULONG_MAX
 
 
 struct ScannerContext;
@@ -103,8 +111,9 @@
     void ignore_line ();
 
     // converts an octal, decimal, or hexadecimal escape sequence
-    // to a numeric value in the range [0, UCHAR_MAX]
-    unsigned char convert_escape (const char*, const char** = 0) const;
+    // (or a multibyte sequence of such things) to a numeric value
+    unsigned long
+    convert_escape (const char*, const char** = 0, bool = false) const;
 
 private: