You are viewing a plain text version of this content. The canonical link for it is here.
Posted to log4cxx-dev@logging.apache.org by ca...@apache.org on 2007/04/04 06:16:25 UTC

svn commit: r525392 - in /logging/log4cxx/trunk: include/log4cxx/helpers/unicodehelper.h src/charsetdecoder.cpp src/charsetencoder.cpp src/unicodehelper.cpp

Author: carnold
Date: Tue Apr  3 21:16:21 2007
New Revision: 525392

URL: http://svn.apache.org/viewvc?view=rev&rev=525392
Log:
LOGCXX-178: Link failure if wchar_t cannot be determined

Modified:
    logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h
    logging/log4cxx/trunk/src/charsetdecoder.cpp
    logging/log4cxx/trunk/src/charsetencoder.cpp
    logging/log4cxx/trunk/src/unicodehelper.cpp

Modified: logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h (original)
+++ logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h Tue Apr  3 21:16:21 2007
@@ -71,35 +71,6 @@
                */
               static int encodeUTF16LE(unsigned int ch, char* dst);
 
-
-#if LOG4CXX_HAS_WCHAR_T
-              /**
-               *   Decodes next character from a sequence of wchar_t values.
-               *   @param src start of character, will be modified to point at next character.
-               *   @param srcEnd end of sequence.
-               *   @return scalar value (UCS-4) or 0xFFFF if invalid sequence.
-               */
-              static unsigned int decodeWide(const wchar_t*& src, const wchar_t* srcEnd);
-
-
-              /**
-               *   Encodes a character to wchar_t.
-               *   @param ch UCS-4 value.
-               *   @param dst buffer to receive wchar_t (must be at least 2 wchar_t)
-               *   @return number of wchar_t needed to represent character
-               */
-              static int encodeWide(unsigned int ch, wchar_t* str);
-
-           /**
-            *   Determines the number of UTF-8 bytes required to express
-            *   the wchar_t value.
-            *   @param ch wchar_t value
-            *   @return number of bytes required.
-            */
-              static int lengthUTF8(wchar_t ch);
-
-#endif
-
               /**
                *   Decodes next character from a LogString.
                *   @param in string from which the character is extracted.

Modified: logging/log4cxx/trunk/src/charsetdecoder.cpp
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/src/charsetdecoder.cpp?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/src/charsetdecoder.cpp (original)
+++ logging/log4cxx/trunk/src/charsetdecoder.cpp Tue Apr  3 21:16:21 2007
@@ -24,6 +24,8 @@
 #include <log4cxx/helpers/pool.h>
 #include <apr_xlate.h>
 #include <log4cxx/private/log4cxx_private.h>
+#include <langinfo.h>
+#include <apr_portable.h>
 
 
 using namespace log4cxx;
@@ -64,7 +66,19 @@
                      if (frompage == APR_DEFAULT_CHARSET) {
                          throw IllegalArgumentException("APR_DEFAULT_CHARSET");
                      } else if (frompage == APR_LOCALE_CHARSET) {
-                         throw IllegalArgumentException("APR_LOCALE_CHARSET");
+                         Pool subpool;
+                         const char* localeEncoding = 
+                            apr_os_locale_encoding((apr_pool_t*) subpool.getAPRPool());
+                         // Solaris likes returning 646 if nl_langinfo has not been called
+                         if(localeEncoding != NULL && strcmp("646", localeEncoding) == 0) {
+                             stat = apr_xlate_open(&convset,
+                                 topage, 
+                                 "ASCII",
+                                 (apr_pool_t*) pool.getAPRPool());
+                         }
+                         if (stat != APR_SUCCESS) {
+                            throw IllegalArgumentException("APR_LOCALE_CHARSET");
+                         } 
                      } else {
                          throw IllegalArgumentException(frompage);
                      }
@@ -121,7 +135,7 @@
           };
 #endif
 
-#if LOG4CXX_HAS_WCHAR_T && !defined(_WIN32_WCE)
+#if LOG4CXX_LOGCHAR_IS_WCHAR && !defined(_WIN32_WCE)
           /**
           *    Converts from the default multi-byte string to
           *        LogString using mbstowcs.
@@ -137,29 +151,10 @@
               }
 
           private:
-#if LOG4CXX_LOGCHAR_IS_WCHAR
               inline log4cxx_status_t append(LogString& out, const wchar_t* buf) {
                   out.append(buf);
                   return APR_SUCCESS;
               }
-#endif
-
-#if LOG4CXX_LOGCHAR_IS_UTF8
-              log4cxx_status_t append(LogString& out, const wchar_t* buf) {
-                  char utf8[8];
-                  const wchar_t* current = buf;
-                  const wchar_t* end = wcschr(buf, 0);
-                  while(current < end) {
-                      unsigned int sv = UnicodeHelper::decodeWide(current, end);
-                      if (sv == 0xFFFF) {
-                          return APR_BADARG;
-                      }
-                      int bytes = UnicodeHelper::encodeUTF8(sv, utf8);
-                      out.append(utf8, bytes);
-                  }
-                  return APR_SUCCESS;
-              }
-#endif
 
               virtual log4cxx_status_t decode(ByteBuffer& in,
                   LogString& out) {
@@ -275,7 +270,7 @@
                 stat = APR_BADARG;
                 break;
              }
-             int wchars = UnicodeHelper::encodeWide(sv, buf);
+             int wchars = UnicodeHelper::encode(sv, buf);
              out.append(buf, wchars);
           }
           in.position(src - in.data());
@@ -378,7 +373,7 @@
 };
 
 
-#if LOG4CXX_LOGCHAR_IS_UTF8 && LOG4CXX_HAS_WCHAR_T
+#if LOG4CXX_LOGCHAR_IS_UTF8 && LOG4CXX_HAS_WCHAR_T && (defined(_WIN32) || defined(__STDC_ISO_10646__))
           /**
           *    Decoder to convert array of wchar_t to UTF-8 bytes.
           *
@@ -391,6 +386,22 @@
 
               virtual ~WideToUTF8CharsetDecoder() {
               }
+              
+#if defined(_WIN32)
+			  unsigned int decodeWide(const wchar_t*& src, const wchar_t* srcEnd) {
+    			unsigned int sv = *(src++);
+    			if (sv < 0xDC00 || sv >= 0xDC00) {
+        			return sv;
+    			}
+    			if (src < srcEnd) {
+        			unsigned short ls = *(src++);
+        			unsigned char w = (unsigned char) ((sv >> 6) & 0x0F);
+        			return ((w + 1) << 16) + ((sv & 0x3F) << 10) + (ls & 0x3FF);
+    			}
+    			return 0xFFFF;
+			  }
+#endif
+
 
 
               virtual log4cxx_status_t decode(ByteBuffer& in,
@@ -400,7 +411,11 @@
                   out.reserve(out.length() + in.remaining()/sizeof(wchar_t));
                   char utf8[8];
                   while(src < srcEnd) {
-                      unsigned int sv = UnicodeHelper::decodeWide(src, srcEnd);
+#if defined(__STDC_ISO_10646__)                  
+                      unsigned int sv = *(src++);
+#else
+    				  unsigned int sv = decodeWide(src, srcEnd);
+#endif    				  
                       if (sv == 0xFFFF) {
                           return APR_BADARG;
                       }
@@ -439,7 +454,7 @@
      return new ISOLatinCharsetDecoder();
 #elif LOG4CXX_LOCALE_ENCODING_US_ASCII
      return new USASCIICharsetDecoder();
-#elif LOG4CXX_HAS_WCHAR_T
+#elif LOG4CXX_LOGCHAR_IS_WCHAR
     return new MbstowcsCharsetDecoder();
 #elif APR_HAS_XLATE
     return new APRCharsetDecoder(APR_LOCALE_CHARSET);

Modified: logging/log4cxx/trunk/src/charsetencoder.cpp
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/src/charsetencoder.cpp?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/src/charsetencoder.cpp (original)
+++ logging/log4cxx/trunk/src/charsetencoder.cpp Tue Apr  3 21:16:21 2007
@@ -44,9 +44,11 @@
               APRCharsetEncoder(const char* topage) {
 #if LOG4CXX_LOGCHAR_IS_WCHAR
                   const char* frompage = "WCHAR_T";
+#define FROMPAGE "WCHAR_T"
 #endif
 #if LOG4CXX_LOGCHAR_IS_UTF8
                   const char* frompage = "UTF-8";
+#define FROMPAGE "UTF-8"
 #endif
                   apr_status_t stat = apr_pool_create(&pool, NULL);
                   if (stat != APR_SUCCESS) {
@@ -60,7 +62,7 @@
                      if (topage == APR_DEFAULT_CHARSET) {
                          throw IllegalArgumentException("APR_DEFAULT_CHARSET");
                      } else if (topage == APR_LOCALE_CHARSET) {
-                         throw IllegalArgumentException("APR_LOCALE_CHARSET");
+                         throw IllegalArgumentException("APRCharsetEncoder(" FROMPAGE ",APR_LOCALE_CHARSET)");
                      } else {
                          throw IllegalArgumentException(topage);
                      }
@@ -106,7 +108,7 @@
           };
 #endif
 
-#if LOG4CXX_HAS_WCHAR_T
+#if LOG4CXX_LOGCHAR_IS_WCHAR
           /**
            *  A character encoder implemented using wcstombs.
           */
@@ -119,8 +121,8 @@
            /**
             *   Converts a wchar_t to the default external multibyte encoding.
             */
-              log4cxx_status_t encode(const std::wstring& in,
-                  std::wstring::const_iterator& iter,
+              log4cxx_status_t encode(const LogString& in,
+                    LogString::const_iterator& iter,
                     ByteBuffer& out) {
                       log4cxx_status_t stat = APR_SUCCESS;
 
@@ -168,46 +170,6 @@
                       return stat;
               }
 
-#if LOG4CXX_LOGCHAR_IS_UTF8
-           /**
-            *    Performs encoding by converting UTF-8 LogString into a wstring
-            *      and delegating to the other encode method.
-            */
-              virtual log4cxx_status_t encode(const std::string& in,
-                  std::string::const_iterator& iter,
-                  ByteBuffer& out) {
-                  log4cxx_status_t stat = APR_SUCCESS;
-                  if (iter != in.end()) {
-                    std::wstring wideIn;
-                    wideIn.reserve(in.length());
-                    const char* src = in.data() + (iter - in.begin());
-                    const char* srcEnd = in.data() + in.length();
-                    wchar_t tmp[2];
-                    while(src < srcEnd) {
-                        unsigned int sv = UnicodeHelper::decodeUTF8(src, srcEnd);
-                        if (sv == 0xFFFF) {
-                            iter = in.begin() + (src - in.data());
-                            return APR_BADARG;
-                        }
-
-                        int wcharCount = UnicodeHelper::encodeWide(sv, tmp);
-                        wideIn.append(tmp, wcharCount);
-                    }
-                    std::wstring::const_iterator wideIter(wideIn.begin());
-                    stat = encode(wideIn, wideIter, out);
-                    if (wideIter == wideIn.end()) {
-                        iter = in.end();
-                    } else {
-                        for(std::wstring::const_iterator i = wideIn.begin();
-                            i != wideIter;
-                            i++) {
-                            iter += UnicodeHelper::lengthUTF8(*i);
-                        }
-                    }
-                  }
-                  return stat;
-              }
-#endif
 
 
           private:
@@ -322,6 +284,7 @@
 #endif
 
 #if LOG4CXX_LOGCHAR_IS_WCHAR
+#if defined(_WIN32) || defined(__STDC_ISO_10646__)
           /**
          *  Converts a wstring to UTF-8.
           */
@@ -334,16 +297,16 @@
               virtual log4cxx_status_t encode(const LogString& in,
                     LogString::const_iterator& iter,
                     ByteBuffer& out) {
-              log4cxx_status_t stat = APR_SUCCESS;
-                  if (iter != in.end()) {
+                    log4cxx_status_t stat = APR_SUCCESS;
+                    if (iter != in.end()) {
                       const logchar* const srcBase = in.data();
                       const logchar* const srcEnd = srcBase + in.length();
                       const logchar* src = in.data() + (iter - in.begin());
                       while(out.remaining() >= 8 && src < srcEnd) {
-                           unsigned int sv = UnicodeHelper::decodeWide(src, srcEnd);
+                           unsigned int sv = UnicodeHelper::decode(src, srcEnd);
                            if (sv == 0xFFFF) {
                                stat = APR_BADARG;
-                        break;
+                               break;
                            }
                            int bytes = UnicodeHelper::encodeUTF8(sv, out.data() + out.position());
                            out.position(out.position() + bytes);
@@ -357,8 +320,10 @@
                   UTF8CharsetEncoder(const UTF8CharsetEncoder&);
                   UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&);
           };
+#else
+#error logchar cannot be wchar_t unless _WIN32 or __STDC_ISO_10646___ is defined          
+#endif
 #endif
-
 
           /**
           *   Encodes a LogString to UTF16-BE.
@@ -421,38 +386,6 @@
                   UTF16LECharsetEncoder& operator=(const UTF16LECharsetEncoder&);
           };
 
-#if LOG4CXX_HAS_WCHAR_T
-          /**
-          *   Converts a LogString to an array of wchar_t.
-          */
-          class WideCharsetEncoder : public CharsetEncoder
-          {
-          public:
-              WideCharsetEncoder() {
-              }
-
-
-              virtual log4cxx_status_t encode(const LogString& in,
-                    LogString::const_iterator& iter,
-                    ByteBuffer& out) {
-                  log4cxx_status_t stat = APR_SUCCESS;
-                  while(iter != in.end() && out.remaining() >= 4) {
-                      unsigned int sv = UnicodeHelper::decode(in, iter);
-                      if (sv == 0xFFFF) {
-                          stat = APR_BADARG;
-                          break;
-                      }
-                      int count = UnicodeHelper::encodeWide(sv, (wchar_t*) out.current());
-                      out.position(out.position() + count * sizeof(wchar_t));
-                  }
-                  return stat;
-              }
-
-          private:
-                  WideCharsetEncoder(const WideCharsetEncoder&);
-                  WideCharsetEncoder& operator=(const WideCharsetEncoder&);
-          };
-#endif
 
 
         } // namespace helpers
@@ -487,7 +420,7 @@
    return new ISOLatinCharsetEncoder();
 #elif LOG4CXX_LOCALE_ENCODING_US_ASCII
    return new USASCIICharsetEncoder();
-#elif LOG4CXX_HAS_WCHAR_T
+#elif LOG4CXX_LOGCHAR_IS_WCHAR
   return new WcstombsCharsetEncoder();
 #elif APR_HAS_XLATE
   return new APRCharsetEncoder(APR_LOCALE_CHARSET);
@@ -513,10 +446,12 @@
 CharsetEncoder* CharsetEncoder::createWideEncoder() {
 #if LOG4CXX_LOGCHAR_IS_WCHAR
   return new TrivialCharsetEncoder();
-#endif
-#if LOG4CXX_LOGCHAR_IS_UTF8
+#elif LOG4CXX_LOGCHAR_IS_UTF8 && (defined(_WIN32) || defined(__STDC_ISO_10646__))
   return new WideCharsetEncoder();
+#else
+  return new APRCharsetEncoder("WCHAR_T");
 #endif
+
 }
 
 

Modified: logging/log4cxx/trunk/src/unicodehelper.cpp
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/src/unicodehelper.cpp?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/src/unicodehelper.cpp (original)
+++ logging/log4cxx/trunk/src/unicodehelper.cpp Tue Apr  3 21:16:21 2007
@@ -92,80 +92,6 @@
 }
 
 
-#if LOG4CXX_HAS_WCHAR_T
-#if defined(_WIN32)
-unsigned int UnicodeHelper::decodeWide(const wchar_t*& src, const wchar_t* srcEnd) {
-    unsigned int sv = *(src++);
-    if (sv < 0xDC00 || sv >= 0xDC00) {
-        return sv;
-    }
-    if (src < srcEnd) {
-        unsigned short ls = *(src++);
-        unsigned char w = (unsigned char) ((sv >> 6) & 0x0F);
-        return ((w + 1) << 16) + ((sv & 0x3F) << 10) + (ls & 0x3FF);
-    }
-    return 0xFFFF;
-}
-
-
-int UnicodeHelper::encodeWide(unsigned int ch, wchar_t* dst) {
-  if (ch <= 0xFFFF) {
-      *dst = (wchar_t) ch;
-      return 1;
-  }
-  unsigned char u = (unsigned char) (ch >> 16);
-  unsigned char w = (unsigned char) (u - 1);
-  wchar_t hs = (wchar_t) (0xD800 + ((w & 0xF) << 6) + ((ch & 0xFFFF) >> 10));
-  wchar_t ls = (wchar_t) (0xDC00 + (ch && 0x3FF));
-  dst[0] = hs;
-  dst[1] = ls;
-  return 2;
-}
-
-int UnicodeHelper::lengthUTF8(wchar_t ch) {
-  if (ch <= 0x7F) {
-      return 1;
-  }
-  if(ch <= 0x7FF) {
-      return 2;
-  }
-  //
-  //   if low surrogate, only add 1 which in combination with
-  //   three from high surrogate makes 4 total UTF-8 bytes
-  if (ch >= 0xDC00 && ch <= 0xDFFF) {
-      return 1;
-  }
-  return 3;
-}
-
-#endif
-
-
-#if defined(__STDC_ISO_10646__)
-int UnicodeHelper::encodeWide(unsigned int ch, wchar_t* dst) {
-   *dst = ch;
-   return 1;
-}
-
-unsigned int UnicodeHelper::decodeWide(const wchar_t*& src, const wchar_t* /* srcEnd */) {
-    return *(src++);
-}
-
-int UnicodeHelper::lengthUTF8(wchar_t ch) {
-  if (ch <= 0x7F) {
-      return 1;
-  }
-  if(ch <= 0x7FF) {
-      return 2;
-  }
-  if (ch <= 0xFFFF) {
-      return 3;
-  }
-  return 4;
-}
-
-#endif
-#endif
 
 
 int UnicodeHelper::encodeUTF8(unsigned int ch, char* dst) {
@@ -240,7 +166,22 @@
 unsigned int UnicodeHelper::decode(const LogString& in, LogString::const_iterator& iter) {
     const wchar_t* src = in.data() + (iter - in.begin());
     const wchar_t* srcEnd = in.data() + in.length();
-    unsigned int sv = decodeWide(src, srcEnd);
+#if defined(__STDC_ISO_10646__)
+    unsigned int sv = *(src++);
+#elif defined(_WIN32)
+    unsigned int sv = *(src++);
+    if (!(sv < 0xDC00 || sv >= 0xDC00)) {
+    	if (src < srcEnd) {
+        	unsigned short ls = *(src++);
+        	unsigned char w = (unsigned char) ((sv >> 6) & 0x0F);
+        	sv = ((w + 1) << 16) + ((sv & 0x3F) << 10) + (ls & 0x3FF);
+    	} else {
+            sv = 0xFFFF;
+        }
+    }
+#else
+#error logchar cannot be wchar_t unless _WIN32 or __STDC_ISO_10646__ is defined
+#endif
     iter = in.begin() + (src - in.data());
     return sv;
 }
@@ -259,9 +200,28 @@
 
 
 #if LOG4CXX_LOGCHAR_IS_WCHAR
+#if defined(_WIN32)
 int UnicodeHelper::encode(unsigned int sv, logchar* out) {
-    return encodeWide(sv, out);
+  if (ch <= 0xFFFF) {
+      *dst = (wchar_t) ch;
+      return 1;
+  }
+  unsigned char u = (unsigned char) (ch >> 16);
+  unsigned char w = (unsigned char) (u - 1);
+  wchar_t hs = (wchar_t) (0xD800 + ((w & 0xF) << 6) + ((ch & 0xFFFF) >> 10));
+  wchar_t ls = (wchar_t) (0xDC00 + (ch && 0x3FF));
+  dst[0] = hs;
+  dst[1] = ls;
+  return 2;
+}
+#elif defined(__STDC_ISO_10646__)
+int UnicodeHelper::encode(unsigned int sv, logchar* out) {
+   *dst = ch;
+   return 1;
 }
+#else
+#error logchar cannot be wchar_t unless _WIN32 or __STDC_ISO_10646__ is defined
+#endif
 #endif