You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stdcxx.apache.org by vi...@apache.org on 2008/03/27 23:25:58 UTC
svn commit: r642006 - in /stdcxx/trunk/tests: include/rw_locale.h src/locale.cpp

Author: vitek
Date: Thu Mar 27 15:25:50 2008
New Revision: 642006

URL: http://svn.apache.org/viewvc?rev=642006&view=rev
Log:

2008-03-27  Travis Vitek  <vi...@roguewave.com>

	STDCXX-716
	* tests/include/rw_locale.h (rw_query_locales): Add declaration
	of new function.
	* tests/src/locale.cpp (atexit_rm_locale_root): Temporarily disable
	disgnostic messages to avoid warnings that occur because function is
	called after run_test() has returned.
	(rw_query_locales): Add new function to query installed locales by
	language, country, codeset or MB_CUR_LEN.


Modified:
    stdcxx/trunk/tests/include/rw_locale.h
    stdcxx/trunk/tests/src/locale.cpp

Modified: stdcxx/trunk/tests/include/rw_locale.h
URL: http://svn.apache.org/viewvc/stdcxx/trunk/tests/include/rw_locale.h?rev=642006&r1=642005&r2=642006&view=diff
==============================================================================
--- stdcxx/trunk/tests/include/rw_locale.h (original)
+++ stdcxx/trunk/tests/include/rw_locale.h Thu Mar 27 15:25:50 2008
@@ -36,6 +36,13 @@
 #define _UNUSED_CAT 69
 
 
+// get a list of all native locales that match the cannonical
+// locale name query strings
+_TEST_EXPORT char*
+rw_locale_query (int = _UNUSED_CAT, const char* = 0, _RWSTD_SIZE_T = 0);
+
+
+// this function will likely be deprecated!
 _TEST_EXPORT char*
 rw_locales (int = _UNUSED_CAT, const char* = 0, bool = true);
 

Modified: stdcxx/trunk/tests/src/locale.cpp
URL: http://svn.apache.org/viewvc/stdcxx/trunk/tests/src/locale.cpp?rev=642006&r1=642005&r2=642006&view=diff
==============================================================================
--- stdcxx/trunk/tests/src/locale.cpp (original)
+++ stdcxx/trunk/tests/src/locale.cpp Thu Mar 27 15:25:50 2008
@@ -36,6 +36,8 @@
 #include <file.h>         // for SHELL_RM_RF, rw_tmpnam
 #include <rw_process.h>   // for rw_system()
 #include <rw_printf.h>    // for rw_snprintf()
+#include <rw_fnmatch.h>   // for rw_fnmatch()
+#include <rw_braceexp.h>  // for rw_shell_expand()
 #include <driver.h>       // for rw_error()
 
 #ifdef _RWSTD_OS_LINUX
@@ -79,12 +81,15 @@
 #  ifndef LC_MESSAGES
 #    define LC_MESSAGES _RWSTD_LC_MESSAGES
 #  endif   // LC_MESSAGES
-#  include <langinfo.h>
 #  define EXE_SUFFIX    ""
 #else   // if MSVC
+#  define _RWSTD_NO_LANGINFO
 #  define EXE_SUFFIX    ".exe"
 #endif  // _MSC_VER
 
+#ifndef _RWSTD_NO_LANGINFO
+#  include <langinfo.h>
+#endif
 
 #if !defined (PATH_MAX) || PATH_MAX < 128 || 4096 < PATH_MAX
    // deal  with undefined, bogus, or excessive values
@@ -288,8 +293,14 @@
 
 static void atexit_rm_locale_root ()
 {
+    const bool e = rw_enable (rw_error, false);
+    const bool n = rw_enable (rw_note , false);
+
     // remove temporary locale databases created by the test
     rw_system (SHELL_RM_RF "%s", rw_locale_root);
+
+    rw_enable (rw_note , n);
+    rw_enable (rw_error, e);
 }
 
 }
@@ -388,16 +399,16 @@
         return deflocname;   // error
     }
 
-    // make sure that grep_exp is <= 80 
+    // make sure that grep_exp is <= 80
     if (grep_exp && 80 < strlen (grep_exp)) {
         abort ();
     }
 
     // execute a shell command and redirect its output into the file
     const int exit_status =
-          grep_exp && *grep_exp
-        ? rw_system ("locale -a | grep \"%s\" > %s", grep_exp, fname)
-        : rw_system ("locale -a > %s", fname);
+        grep_exp && *grep_exp
+      ? rw_system ("locale -a | grep \"%s\" > %s", grep_exp, fname)
+      : rw_system ("locale -a > %s", fname);
 
     if (exit_status) {
         return deflocname;   // error
@@ -930,4 +941,701 @@
     remove (catname);
 
     return ret;
+}
+
+
+// our locale database is a big array of these
+struct _rw_locale_entry {
+    char locale_name    [64]; // English_United States.1252
+    char canonical_name [32]; // en-US-1-1252
+    struct _rw_locale_entry* next;
+};
+
+static int
+_rw_toupper (int chr)
+{
+    //if (chr < 'a' || 'z' < chr)
+    //    return chr;
+    //return chr - 'a' + 'A';
+    switch (chr)
+    {
+    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+    case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+    case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+    case 'y': case 'z':
+        return chr - 'a' + 'A';
+    }
+
+    return chr;
+}
+
+static int
+_rw_tolower (int chr)
+{
+    //if (chr < 'A' || 'Z' < chr)
+    //    return chr;
+    //return chr - 'A' + 'a';
+    switch (chr)
+    {
+    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+    case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+    case 'Y': case 'Z':
+        return chr - 'A' + 'a';
+    }
+
+    return chr;
+}
+
+static int
+_rw_isspace (int chr)
+{
+    switch (chr)
+    {
+        case '\r':
+        case '\n':
+        case '\t':
+        case ' ':
+            return 1;
+    }
+
+    return 0;
+}
+
+struct _rw_locale_array {
+    _rw_locale_entry* entries;
+    _RWSTD_SIZE_T count;
+};
+
+struct _rw_lookup_entry_t {
+    const char* native;
+    const char* canonical;
+};
+
+extern "C" {
+
+static int
+_rw_lookup_comparator (const void* _lhs, const void* _rhs)
+{
+    const _rw_lookup_entry_t* lhs = (const _rw_lookup_entry_t*)_lhs;
+    const _rw_lookup_entry_t* rhs = (const _rw_lookup_entry_t*)_rhs;
+
+    return strcmp (lhs->native, rhs->native);
+}
+
+}  // extern "C"
+
+struct _rw_lookup_table_t {
+
+    _rw_lookup_table_t ()
+        : entries_ (0), count_ (0), table_data_ (0) {
+    }
+
+    ~_rw_lookup_table_t () {
+        if (entries_)
+            free (entries_);
+        entries_ = 0;
+        count_   = 0;
+
+        if (table_data_)
+            free (table_data_);
+        table_data_ = 0;
+    }
+
+    bool load_from_file (const char* path, const char* file, int upper_or_lower);
+
+    const _rw_lookup_entry_t* get_entries () const {
+        return entries_;
+    }
+
+    size_t get_num_entries () const {
+        return count_;
+    }
+
+    const char* get_canonical_name (const char* name) const;
+
+private:
+
+    _rw_lookup_entry_t* entries_;
+    size_t              count_;
+    char*               table_data_;
+
+private:
+    // intentionally hidden
+    _rw_lookup_table_t (const _rw_lookup_table_t& rhs);
+    _rw_lookup_table_t& operator= (const _rw_lookup_table_t& rhs);
+};
+
+
+static void
+_rw_reset_locales (_rw_locale_array* a)
+{
+    // reset the next pointers so that all locales are included
+    for (size_t i = 0; i < a->count; ++i)
+        a->entries [i].next = &a->entries [i+1];
+    a->entries [a->count - 1].next = 0;
+}
+
+//
+// this function gets a list of all of the locales that are installed. it
+// only queries the system once and caches the result for use in future
+// requests.
+//
+static _rw_locale_array
+_rw_all_locales ()
+{
+    static _rw_locale_array result;
+
+    // if we have already collection, reuse it
+    if (result.entries && result.count != 0) {
+        _rw_reset_locales (&result);
+        return result;
+    }
+
+    static _rw_locale_entry fallback = {
+        "C", "C", 0
+    };
+
+    result.entries = &fallback;
+    result.count   = 1;
+
+    const char* const fname = rw_tmpnam (0);
+    if (!fname) {
+        return result;
+    }
+
+    const int status = rw_system ("locale -a > %s", fname);
+    if (status) {
+        return result;
+    }
+
+    FILE* file = fopen (fname, "r");
+    if (file) {
+
+        // looks to be the first time, get a list of all locales
+        const size_t entry_size = sizeof (_rw_locale_entry);
+        const size_t grow_size  = 64;
+        
+        _rw_locale_entry* entries = 0;
+        size_t capacity = 0;
+        size_t size     = 0;
+
+        // load the native to canonical lookup table
+        _rw_lookup_table_t languages_map;
+        _rw_lookup_table_t countries_map;
+        _rw_lookup_table_t encodings_map;
+
+        // use TOPDIR to determine the root of the source tree
+        const char* const topdir = getenv (TOPDIR);
+        if (!topdir || !*topdir) {
+            rw_error (0, __FILE__, __LINE__,
+                      "the environment variable %s is %s",
+                      TOPDIR, topdir ? "empty" : "undefined");
+        }
+        else {
+            // we should be loading this from some other well
+            // known path so we don't depend on $TOPDIR. sadly
+            // __FILE__ is not an absolute path on msvc
+
+            char path [PATH_MAX];
+            strcpy (path, topdir);
+            strcat (path, SLASH RELPATH SLASH);
+
+            // load mapping from local to canonical names
+            languages_map.load_from_file (path, "languages", -1);
+            countries_map.load_from_file (path, "countries",  1);
+            encodings_map.load_from_file (path, "encodings",  1);
+        }
+
+        char locale [128];
+        while (fgets (locale, sizeof (locale), file)) {
+
+            // ensure sufficient space in array
+            if (! (size < capacity)) {
+                capacity += grow_size;
+
+                _rw_locale_entry* new_entries =
+                    _RWSTD_STATIC_CAST(_rw_locale_entry*,
+                                    _QUIET_MALLOC (entry_size * capacity));
+                if (!new_entries) {
+                    return result; // allocation failed
+                }
+
+                memcpy (new_entries, entries, entry_size * size);
+
+                // deallocate the old buffer
+                _QUIET_FREE (entries);
+
+                entries = new_entries;
+            }
+
+            // grab entry to update
+            _rw_locale_entry* const entry = &entries [size];
+            entry->next = 0;
+
+            const size_t len = strlen (locale);
+            locale [len ? len - 1 : 0] = '\0';
+
+            // we need MB_CUR_MAX and CODESET
+            if (!setlocale (LC_CTYPE, locale)) {
+                
+                rw_note (0, __FILE__, __LINE__,
+                    "setlocale() failed for '%s'",
+                    entry->locale_name);
+
+                continue;
+
+            } else if (sizeof (entry->locale_name) < len) {
+
+                rw_note (0, __FILE__, __LINE__,
+                    "locale name '%s' was to long for fixed buffer",
+                    entry->locale_name);
+
+                continue; // locale name didn't fit, so we skip it
+            }
+            else if (!strcmp (locale, "C") || !strcmp (locale, "POSIX"))
+                continue; // we don't do C/POSIX locale
+
+#ifndef _RWSTD_NO_LANGINFO
+            char codeset [40];
+
+            int i = 0;
+            for (const char* charset = nl_langinfo (CODESET);
+                 *charset;
+                 ++charset) {
+                codeset [i++] = _rw_toupper (*charset);
+            }
+
+            codeset [i] = '\0';
+#endif
+
+            // copy the locale name
+            strcpy (entry->locale_name, locale);
+
+            // attempt to split line into parts
+            char* extension = strrchr (locale, '@');
+            if (extension) {
+                *extension++ = '\0';
+            }
+
+            char* encoding = strrchr (locale, '.');
+            if (encoding) {
+                *encoding++ = '\0';
+
+                for (int n = 0; encoding [n]; ++n)
+                    encoding [n] = _rw_toupper (encoding [n]);
+            }
+
+            char* country = strrchr (locale, '_');
+            if (country) {
+                *country++ = '\0';
+
+                for (int n = 0; country [n]; ++n)
+                    country [n] = _rw_toupper (country [n]);
+            }
+            
+            char* language = locale;
+
+            for (int n = 0; language [n]; ++n)
+                language [n] = _rw_tolower (language [n]);
+
+            // use mapping databases to find the canonical
+            // names for each part of the locale name
+
+            const char* planguage =
+                languages_map.get_canonical_name (language);
+            if (!planguage)
+                planguage = language;
+
+            // if country name was provided, then lookup in the country
+            // mapping. otherwise use language to guess country.
+            const char* pcountry =
+                  countries_map.get_canonical_name (country);
+            if (!pcountry)
+                pcountry = country;
+
+#ifndef _RWSTD_NO_LANGINFO
+            const char* pencoding =
+                encodings_map.get_canonical_name (codeset);
+            if (!pencoding)
+                pencoding = codeset;
+#else
+            const char* pencoding =
+                encodings_map.get_canonical_name (encoding);
+            if (!pencoding)
+                pencoding = encoding;
+#endif
+
+            // require all three mappings are valid
+            if (!planguage || !*planguage) {
+
+                rw_note (0, __FILE__, __LINE__,
+                    "failed to get language for locale %s",
+                    entry->locale_name);
+
+                continue;
+            }
+            else if (!pcountry || !*pcountry) {
+
+                rw_note (0, __FILE__, __LINE__,
+                    "failed to get country for locale %s",
+                    entry->locale_name);
+
+                continue;
+            }
+            else if (!pencoding || !*pencoding) {
+
+                rw_note (0, __FILE__, __LINE__,
+                    "failed to get codeset for locale %s",
+                    entry->locale_name);
+
+                continue;
+            }
+
+            // the canonical name for lookup
+            sprintf (entry->canonical_name, "%s-%s-%d-%s",
+                     planguage, pcountry, MB_CUR_MAX, pencoding);
+
+            size += 1;
+        }
+
+        fclose (file);
+
+        // delete temp file
+        remove (fname);
+
+        // link all of the nodes into result
+        result.entries = entries;
+        result.count   = size;
+    }
+
+    // link each node to the next. if the array is sorted,
+    // the list will be sorted.
+    _rw_reset_locales (&result);
+
+    return result;
+}
+
+_TEST_EXPORT char*
+rw_locale_query (int loc_cat, const char* query, size_t wanted)
+{
+    // query format <language>-<COUNTRY>-<MB_CUR_LEN>-<CODESET>
+
+    // the null query string will return any locale
+    if (!query)
+        query = "*";
+
+    if (!wanted)
+        wanted = _RWSTD_SIZE_MAX;
+
+    char buf [256];
+
+    // get a brace expanded representation of query, each expansion
+    // is a null terminated string. the entire buffer is also null 
+    // terminated
+    char* res = rw_brace_expand (query, 0, buf, sizeof (buf), '\0');
+    if (!res)
+        return 0;
+
+    // cache the locale name so we can restore later, this must happen
+    // before _rw_all_locales() because that function just changes the
+    // locale without restoring it
+    char save_locale [128];
+    strcpy (save_locale, setlocale (LC_ALL, 0));
+
+    const _rw_locale_array all = _rw_all_locales ();
+
+    // make these local and require the user to deallocate
+    // with free?
+    static char*  string   = 0;
+    static size_t length   = 0;
+    static size_t capacity = 0;
+
+    _rw_locale_entry rejects;
+    rejects.canonical_name [0] = '\0';
+    rejects.locale_name    [0] = '\0';
+    rejects.next = all.entries;
+
+    // for each result locale name
+    size_t count = 0;
+    for (const char* name = res; *name; name += strlen (name) + 1)
+    {
+        _rw_locale_entry* dummy = &rejects;
+
+        // linear search for matches in the reject list
+        while (dummy->next)
+        {
+            // append name to the output buffer
+            const _rw_locale_entry* entry = dummy->next;
+
+            // see if we found a match
+            if (rw_fnmatch (name, entry->canonical_name, 0)) {
+
+                // not a match, advance past it leaving it in the
+                // rejects list
+                dummy = dummy->next;
+
+                // and move along to next one
+                continue;
+
+            }
+
+            // remove the accepted entry from the reject list
+            // so we will not include it again
+            dummy->next = entry->next;
+
+            // if the user requested locales from a specific category
+            if (loc_cat != _UNUSED_CAT) {
+
+                // make sure that the matching locale has the specified
+                // locale category and that we can use it.
+                if (!setlocale (loc_cat, entry->locale_name)) {
+
+                    // if we can't use it, then bail. this effectively
+                    // removes the locale from the rejects list and
+                    // doesn't add it to the accepted list.
+                    continue;
+                }
+            }
+
+            const size_t add_length = strlen (entry->locale_name) + 1;
+            const size_t new_length = length + add_length;
+
+            // grow buffer if necessary
+            if (! (new_length < capacity)) {
+
+                while (capacity < new_length)
+                    capacity += 256;
+
+                // one additional character for the second null
+                char* new_string =
+                    _RWSTD_STATIC_CAST(char*, _QUIET_MALLOC (capacity + 1));
+                if (!new_string) {
+
+                    // setup to get out of outer loop
+                    count = wanted;
+
+                    // get out of inner loop
+                    break;
+                }
+
+                memcpy (new_string, string, length);
+
+                _QUIET_FREE (string);
+
+                string = new_string;
+            }
+
+            // append the name, and update the length
+            memcpy (string + length, entry->locale_name, add_length);
+
+            length = new_length;
+
+            count += 1;
+            if (! (count < wanted))
+                break;
+        }
+
+        if (! (count < wanted))
+            break;
+    }
+
+    // restore the previous locale
+    setlocale (LC_ALL, save_locale);
+
+    // deallocate the shell expand buffer if needed
+    if (res != buf)
+        free (res);
+
+    // double null terminated
+    if (string) {
+        string [length+0] = '\0';
+        string [length+1] = '\0';
+    }
+
+    return string;
+}
+
+
+const char*
+_rw_lookup_table_t::get_canonical_name (const char* name) const
+{
+    if (!name)
+        return 0; // don't search for null string
+
+    const _rw_lookup_entry_t entry = { name, 0 };
+
+    const _rw_lookup_entry_t* found =
+        (const _rw_lookup_entry_t*)bsearch (&entry,
+                                            entries_,
+                                            count_,
+                                            sizeof (_rw_lookup_entry_t),
+                                            _rw_lookup_comparator);
+    if (found)
+        return found->canonical;
+
+    return 0;
+}
+
+bool
+_rw_lookup_table_t::load_from_file (const char* path, const char* name, int upper_or_lower)
+{
+    if (entries_)
+        return false; // should never happen
+
+    // buffer overflow!
+    char filename [PATH_MAX];
+    strcpy (filename, path);
+    strcat (filename, name);
+
+    FILE* file = fopen (filename, "rb");
+    if (file) {
+
+        // get the size of the file in bytes
+        fseek (file, 0, SEEK_END);
+        const size_t table_data_size = ftell (file);
+        fseek (file, 0, SEEK_SET);
+
+        char* table_data = (char*)malloc (table_data_size + 1);
+        
+        if (!table_data) {
+            return false;
+        }
+
+        // read the entire file into the data buffer
+        const size_t bytes_read =
+            fread (table_data, 1, table_data_size, file);
+        if (bytes_read != table_data_size) {
+            free (table_data);
+            return false;
+        }
+
+        // null terminate
+        table_data [bytes_read] = '\0';
+
+        const size_t entry_size = sizeof (_rw_lookup_entry_t);
+        
+        _rw_lookup_entry_t* entries = 0;
+        size_t capacity = 0;
+        size_t size     = 0;
+
+        const char* canonical_name = 0;
+        
+        for (size_t offset = 0; offset < bytes_read; /**/) {
+
+            char* key = table_data + offset;
+
+            const int len = strcspn (key, "\r\n");
+            key [len] = '\0';
+
+            // skip the newline if it is there
+            offset += (len + 1);
+
+            // special handling for line ends and comments
+            if (!*key || *key == '\n'
+                      || *key == '#')
+                continue;
+
+            // make upper or lower case as requested
+            if (upper_or_lower < 0) {
+                for (char* s = key; *s; ++s)
+                    *s = _rw_tolower (*s);
+            }
+            else if (0 < upper_or_lower) {
+                for (char* s = key; *s; ++s)
+                    *s = _rw_toupper (*s);
+            }
+
+            // if first character of new line is not whitespace, then we have a new
+            // canonical name token
+            if (!_rw_isspace (*key)) {
+
+                canonical_name = key;
+
+                // increment key past cannonical name
+                for (/**/; *key; ++key)
+                    if (_rw_isspace (*key))
+                        break;
+            }
+
+            // kill whitespace
+            while (_rw_isspace (*key))
+                *key++ = '\0';
+
+            // key points to first non-whitespace after canonical name
+
+            while (*key) {
+
+                // key is first non-whitespace character, which is the
+                // next native name we should record
+                const char* native_name = key;
+
+                // find first comma character, that is the end of the
+                // native name
+                while (*key && *key != ',')
+                    ++key;
+
+                // if we found a comma, setup next name
+                if (*key)
+                    *key++ = '\0';
+
+                // kill any whitespace before comma
+                for (char* bey = key - 1; _rw_isspace (*bey); --bey)
+                    *bey = '\0';
+
+                // kill whitespace after comma
+                while (_rw_isspace (*key))
+                    *key++ = '\0';
+
+                // ensure we have enough entries
+                if (! (size < capacity)) {
+
+                    capacity += 64;
+
+                    _rw_lookup_entry_t* new_entries =
+                        (_rw_lookup_entry_t*)malloc (entry_size * capacity);
+                    if (!new_entries) {
+
+                        free (entries);
+
+                        free (table_data);
+
+                        return false;
+                    }
+
+                    memcpy (new_entries, entries, entry_size * size);
+
+                    free (entries);
+
+                    entries = new_entries;
+                }
+
+                // add the new mapping entry
+                _rw_lookup_entry_t* const entry = &entries [size];
+                entry->native = native_name;
+                entry->canonical = canonical_name;
+
+                // increment number of entries
+                size += 1;
+            }
+        }
+
+        fclose (file);
+
+        // sort the entries by native name for efficient searching
+        qsort (entries, size, entry_size, _rw_lookup_comparator);
+
+        // setup the table for return
+        entries_  = entries;
+        count_    = size;
+        table_data_ = table_data;
+    }
+    else {
+        rw_error (0, __FILE__, __LINE__,
+                  "failed to open the file %s", filename);
+    }
+
+    return true;
 }