You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by se...@apache.org on 2006/08/30 01:15:06 UTC
svn commit: r438284 - in /incubator/stdcxx/trunk/util: collate.cpp
locale.cpp scanner.cpp scanner.h
Author: sebor
Date: Tue Aug 29 16:15:06 2006
New Revision: 438284
URL: http://svn.apache.org/viewvc?rev=438284&view=rev
Log:
2006-08-29 Martin Sebor <se...@roguewave.com>
STDCXX-277
* locale.cpp (print_weight): Formatted weights with value greater
than UCHAR_MAX as multibyte characters of value UCHAR_MAX or less.
(write_coll_info): Introduced a convenience typedef and simplified.
* collate.cpp (get_weight): Handled weights formatted as multibyte
characters (using escape sequences).
* scanner.h (convert_escape): Added an argument.
* scanner.cpp (convert_escape): Handled multibyte characters.
Modified:
incubator/stdcxx/trunk/util/collate.cpp
incubator/stdcxx/trunk/util/locale.cpp
incubator/stdcxx/trunk/util/scanner.cpp
incubator/stdcxx/trunk/util/scanner.h
Modified: incubator/stdcxx/trunk/util/collate.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/collate.cpp?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/collate.cpp (original)
+++ incubator/stdcxx/trunk/util/collate.cpp Tue Aug 29 16:15:06 2006
@@ -6,16 +6,23 @@
*
***************************************************************************
*
- * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave
- * Software division. Licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the
- * License. You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0. Unless required by
- * applicable law or agreed to in writing, software distributed under
- * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License
- * for the specific language governing permissions and limitations under
- * the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
*
**************************************************************************/
@@ -2020,28 +2027,34 @@
else if (w.token == Scanner::tok_decimal_value
|| w.token == Scanner::tok_hex_value
|| w.token == Scanner::tok_octal_value) {
- // the weights are given in decimal form so get the value
- // and assign it to the weights
- int val;
- const char* next_val = std::strchr(w.name.c_str(),
- scanner_.escape_char ());
- const char* next_weight = std::strchr (w.name.c_str(), ';');
+ // the weight is given in numerical form
+ const char* next_val =
+ std::strchr (w.name.c_str (), scanner_.escape_char ());
+
+ assert (0 != next_val);
+
+ const char* next_wt = std::strchr (w.name.c_str (), ';');
+
while (weight_num < collate_out_.num_weights) {
- int c = 0;
- for ( c = 0;
- 0 != next_val && (0 == next_weight
- || next_val < next_weight);c++) {
- val = scanner_.convert_escape (next_val);
- weights[weight_num].weight[c] = val;
- next_val = std::strchr(next_val + 1, scanner_.escape_char ());
+ std::size_t c;
+
+ for (c = 0; *next_val && (!next_wt || next_val < next_wt); ++c) {
+
+ const char* end = 0;
+
+ weights [weight_num].weight [c] =
+ scanner_.convert_escape (next_val, &end, true);
+
+ assert (0 != end);
+
+ next_val = end;
}
- weights[weight_num].size = c;
- weight_num++;
+ weights [weight_num++].size = c;
- if (0 != next_weight && 0 != next_val)
- next_weight = std::strchr (next_val, ';');
+ if (next_wt)
+ next_wt = std::strchr (next_val, ';');
}
@@ -2052,16 +2065,19 @@
weights[weight_num].weight[0] = 0;
}
else if (w.token == Scanner::tok_string) {
- // the weights are given in symbolic name form so
- // extract the string inside quotes
- std::string tmp (w.name.substr (1, w.name.size() - 2));
+ // the weights are given either in symbolic name form (e.g.,
+ // "<symbolic-name>" or in the form of a quoted multibyte
+ // character string (e.g., "\001\d010\x16\")
+ const std::string tmp (w.name.substr (1, w.name.size() - 2));
// keeps track of the length of the weight
unsigned char k = 0;
// iterate thru the string content and retrieve the symbols
- std::string::iterator it = tmp.begin ();
- char ec = scanner_.escape_char ();
+ std::string::const_iterator it = tmp.begin ();
+
+ const char escape = scanner_.escape_char ();
+
while (it != tmp.end ()) {
// weight in string form
std::string wsym;
@@ -2069,7 +2085,7 @@
// next comes a symbolic name
if (*it == '<') {
while (*it != '>') {
- if (*it == ec)
+ if (*it == escape)
it++;
wsym += *it++;
@@ -2113,20 +2129,23 @@
"symbolic name %s not found\n",
wsym.c_str());
}
- } else if (*it == ec) {
- std::string wstr (it, tmp.end ());
- std::string::size_type idx;
-
- do{
- weights [weight_num].weight [k++] =
- scanner_.convert_escape (wstr.c_str ());
-
- idx = wstr.find (scanner_.escape_char (), 1);
- if (std::string::npos != idx )
- wstr = wstr.substr (idx);
- } while (std::string::npos != idx);
+ }
+ else if (*it == escape) {
+
+ // weight is given in a quoted escape form
+ const char* const beg = tmp.c_str () + (it - tmp.begin ());
+ const char* end = 0;
+
+ weights [weight_num].weight [k++] =
+ scanner_.convert_escape (beg, &end, true);
+
+ assert (0 != end);
+
+ it += end - beg;
+
break;
- } else
+ }
+ else
issue_diag (E_SYNTAX, true, &w,
"illegal string content as a weight");
}
@@ -2174,4 +2193,3 @@
}
}
}
-
Modified: incubator/stdcxx/trunk/util/locale.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/locale.cpp?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/locale.cpp (original)
+++ incubator/stdcxx/trunk/util/locale.cpp Tue Aug 29 16:15:06 2006
@@ -6,22 +6,23 @@
*
***************************************************************************
*
- * Copyright 2005-2006 The Apache Software Foundation or its licensors,
- * as applicable.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at
*
- * Copyright 2001-2006 Rogue Wave Software.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
*
**************************************************************************/
@@ -72,7 +73,12 @@
bool print_cat_names = false;
bool print_keywords = false;
+
+// set to true in response to the "-h" command line option requesting
+// locale to try to use character names from the original charmap used
+// to create the locale (if possible)
bool decode = false;
+
bool is_utf8 = true;
bool POSIX_output = true;
@@ -568,51 +574,61 @@
return 0;
}
+
void print_weight (const unsigned int* weightp,
unsigned int num_weights,
unsigned int longest_weight)
{
// FIXME: ignore the order of the element
- weightp++;
+ ++weightp;
- for (unsigned int k = 0; k < num_weights; k++) {
- bool quoted = false;
+ for (unsigned k = 0; k != num_weights; ++k) {
- if (*weightp && longest_weight > 1) {
- quoted = true;
- std::cout << "\"";
- }
+ for (unsigned x = 0; x != longest_weight; ++x, ++weightp) {
- for (unsigned int x = 0; x < longest_weight; x++){
if (*weightp != UINT_MAX) {
- if (*weightp == 0)
- std::cout << "IGNORE";
- else
- std::cout << "\\d" << *weightp;
- }
- weightp++;
- }
+ if (0 == *weightp)
+ std::cout << "IGNORE;";
+ else if (*weightp <= UCHAR_MAX) {
+ std::cout << "\\d" << *weightp << ';';
+ }
+ else {
+ std::cout << '"';
+
+ unsigned wt = *weightp;
+
+ for (unsigned inx = sizeof wt; wt && inx--; ) {
- if (quoted)
- std::cout << "\"";
+ const unsigned byte =
+ (wt >> CHAR_BIT * inx) & UCHAR_MAX;
- if (k != num_weights)
- std::cout << ";";
+ wt &= ~(wt << (CHAR_BIT * inx));
+
+ if (byte || wt)
+ std::cout << "\\d" << byte;
+ }
+ std::cout << "\";";
+ }
+ }
+ }
}
std::cout << '\n';
}
+
void write_coll_info (const std::string &ch, unsigned int idx,
unsigned int tab_num)
{
+ typedef unsigned char UChar;
+
if (collate_st->num_elms > 1) {
typedef std::map <std::string, wchar_t>::const_iterator n_cmap2_iter;
const unsigned int* tab = collate_st->get_n_tab (tab_num);
unsigned int first = collate_st->get_first_char_in_n_tab(tab_num);
for (unsigned int i = first; i <= UCHAR_MAX; i++) {
std::string new_ch = ch;
- new_ch += (unsigned char)i;
+ new_ch += UChar (i);
if (tab[i - first] != UINT_MAX) {
if (tab[i - first] & 0x80000000) {
// it's an offset to another table
@@ -633,10 +649,12 @@
}
else {
for (unsigned int j = 0; j < idx; j++) {
- std::cout << "\\d" << (unsigned int)
- (unsigned char)new_ch[j];
+ const UChar uc = UChar (new_ch [j]);
+
+ std::cout << "\\d" << int (uc);
+
if (j != idx - 1)
- std::cout << ";";
+ std::cout << ';';
}
}
std::cout << " ";
Modified: incubator/stdcxx/trunk/util/scanner.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/scanner.cpp?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/scanner.cpp (original)
+++ incubator/stdcxx/trunk/util/scanner.cpp Tue Aug 29 16:15:06 2006
@@ -701,50 +701,69 @@
}
-unsigned char Scanner::
-convert_escape (const char* esc, const char **pend /* = 0 */) const
+unsigned long Scanner::
+convert_escape (const char *esc,
+ const char **pend /* = 0 */,
+ bool multi /* = false */) const
{
assert (0 != esc);
- const char* s = esc;
+ const char escape = escape_char ();
- if (escape_char () != *s)
+ if (escape != *esc)
issue_diag (E_SYNTAX, true, 0,
"expected the escape character ('%c'): %s\n",
- escape_char (), esc);
+ escape, esc);
- int base = 16;
- const char *basename = "hexadecimal";
+ unsigned long value = 0;
- switch (*++s) {
- case 'o': base = 8; basename = "octal"; break;
- case 'd': base = 10; basename = "decimal"; break;
- case 'x': break;
- default:
- issue_diag (E_SYNTAX, true, 0,
- "one of { 'o', 'd', 'x' } expected following "
- "the escape character: %s\n", esc);
- }
+ for (const char *s = esc; ; ) {
- ++s;
+ // escaped characters are octal by default
+ const char *basename = "octal";
+ int base = 8;
- const char *end = 0;
+ switch (*++s) {
+ case 'd': ++s; base = 10; basename = "decimal"; break;
+ case 'x': ++s; base = 16; basename = "hexadecimal"; break;
- if (!pend)
- pend = &end;
+ case 'o': ++s;
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ break;
- const long val = std::strtol (s, (char**)pend, base);
+ default:
+ issue_diag (E_SYNTAX, true, 0,
+ "one of { 'o', 'd', 'x' } expected following "
+ "the escape character: %s\n", esc);
+ }
- if (pend == &end && *pend && **pend)
- issue_diag (E_SYNTAX, true, 0,
- "%s constant expected: %s\n", basename, esc);
+ const char *end = 0;
+
+ if (!pend)
+ pend = &end;
+
+ const unsigned long byte = std::strtoul (s, (char**)pend, base);
+
+ if (!multi && pend == &end && **pend)
+ issue_diag (E_SYNTAX, true, 0,
+ "%s constant expected: %s\n", basename, esc);
+
+ if (UCHAR_MAX < byte)
+ issue_diag (E_INVAL, true, 0,
+ "%s byte value must be in the range [0, %d]: %s\n",
+ basename, int (UCHAR_MAX), esc);
+
+ if (value >> (sizeof (unsigned long) - 1) * CHAR_BIT)
+ issue_diag (E_INVAL, true, 0, "integer overflow: %s\n", esc);
+
+ value = (value << CHAR_BIT) | byte;
- if (val < 0 || val > long (UCHAR_MAX))
- issue_diag (E_INVAL, true, 0,
- "%s value in the range [0, %lu) expected: %s\n",
- basename, long (UCHAR_MAX), esc);
+ if (**pend != escape || !multi)
+ break;
- typedef unsigned char UChar;
+ s = *pend;
+ }
- return UChar (val);
+ return value;
}
Modified: incubator/stdcxx/trunk/util/scanner.h
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/scanner.h?rev=438284&r1=438283&r2=438284&view=diff
==============================================================================
--- incubator/stdcxx/trunk/util/scanner.h (original)
+++ incubator/stdcxx/trunk/util/scanner.h Tue Aug 29 16:15:06 2006
@@ -2,20 +2,27 @@
*
* scanner.h
*
- * $Id: //stdlib/dev/source/stdlib/util/scanner.h#39 $
+ * $Id$
*
***************************************************************************
*
- * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave
- * Software division. Licensed under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the
- * License. You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0. Unless required by
- * applicable law or agreed to in writing, software distributed under
- * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License
- * for the specific language governing permissions and limitations under
- * the License.
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ * Copyright 2001-2006 Rogue Wave Software.
*
**************************************************************************/
@@ -24,6 +31,7 @@
#include <string>
#include <stack>
+#include <climits> // for ULONG_MAX
struct ScannerContext;
@@ -103,8 +111,9 @@
void ignore_line ();
// converts an octal, decimal, or hexadecimal escape sequence
- // to a numeric value in the range [0, UCHAR_MAX]
- unsigned char convert_escape (const char*, const char** = 0) const;
+ // (or a multibyte sequence of such things) to a numeric value
+ unsigned long
+ convert_escape (const char*, const char** = 0, bool = false) const;
private: