You are viewing a plain text version of this content. The canonical link for it is here.
Posted to log4cxx-dev@logging.apache.org by ca...@apache.org on 2007/04/04 06:16:25 UTC
svn commit: r525392 - in /logging/log4cxx/trunk:
include/log4cxx/helpers/unicodehelper.h src/charsetdecoder.cpp
src/charsetencoder.cpp src/unicodehelper.cpp
Author: carnold
Date: Tue Apr 3 21:16:21 2007
New Revision: 525392
URL: http://svn.apache.org/viewvc?view=rev&rev=525392
Log:
LOGCXX-178: Link failure if wchar_t cannot be determined
Modified:
logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h
logging/log4cxx/trunk/src/charsetdecoder.cpp
logging/log4cxx/trunk/src/charsetencoder.cpp
logging/log4cxx/trunk/src/unicodehelper.cpp
Modified: logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h (original)
+++ logging/log4cxx/trunk/include/log4cxx/helpers/unicodehelper.h Tue Apr 3 21:16:21 2007
@@ -71,35 +71,6 @@
*/
static int encodeUTF16LE(unsigned int ch, char* dst);
-
-#if LOG4CXX_HAS_WCHAR_T
- /**
- * Decodes next character from a sequence of wchar_t values.
- * @param src start of character, will be modified to point at next character.
- * @param srcEnd end of sequence.
- * @return scalar value (UCS-4) or 0xFFFF if invalid sequence.
- */
- static unsigned int decodeWide(const wchar_t*& src, const wchar_t* srcEnd);
-
-
- /**
- * Encodes a character to wchar_t.
- * @param ch UCS-4 value.
- * @param dst buffer to receive wchar_t (must be at least 2 wchar_t)
- * @return number of wchar_t needed to represent character
- */
- static int encodeWide(unsigned int ch, wchar_t* str);
-
- /**
- * Determines the number of UTF-8 bytes required to express
- * the wchar_t value.
- * @param ch wchar_t value
- * @return number of bytes required.
- */
- static int lengthUTF8(wchar_t ch);
-
-#endif
-
/**
* Decodes next character from a LogString.
* @param in string from which the character is extracted.
Modified: logging/log4cxx/trunk/src/charsetdecoder.cpp
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/src/charsetdecoder.cpp?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/src/charsetdecoder.cpp (original)
+++ logging/log4cxx/trunk/src/charsetdecoder.cpp Tue Apr 3 21:16:21 2007
@@ -24,6 +24,8 @@
#include <log4cxx/helpers/pool.h>
#include <apr_xlate.h>
#include <log4cxx/private/log4cxx_private.h>
+#include <langinfo.h>
+#include <apr_portable.h>
using namespace log4cxx;
@@ -64,7 +66,19 @@
if (frompage == APR_DEFAULT_CHARSET) {
throw IllegalArgumentException("APR_DEFAULT_CHARSET");
} else if (frompage == APR_LOCALE_CHARSET) {
- throw IllegalArgumentException("APR_LOCALE_CHARSET");
+ Pool subpool;
+ const char* localeEncoding =
+ apr_os_locale_encoding((apr_pool_t*) subpool.getAPRPool());
+ // Solaris likes returning 646 if nl_langinfo has not been called
+ if(localeEncoding != NULL && strcmp("646", localeEncoding) == 0) {
+ stat = apr_xlate_open(&convset,
+ topage,
+ "ASCII",
+ (apr_pool_t*) pool.getAPRPool());
+ }
+ if (stat != APR_SUCCESS) {
+ throw IllegalArgumentException("APR_LOCALE_CHARSET");
+ }
} else {
throw IllegalArgumentException(frompage);
}
@@ -121,7 +135,7 @@
};
#endif
-#if LOG4CXX_HAS_WCHAR_T && !defined(_WIN32_WCE)
+#if LOG4CXX_LOGCHAR_IS_WCHAR && !defined(_WIN32_WCE)
/**
* Converts from the default multi-byte string to
* LogString using mbstowcs.
@@ -137,29 +151,10 @@
}
private:
-#if LOG4CXX_LOGCHAR_IS_WCHAR
inline log4cxx_status_t append(LogString& out, const wchar_t* buf) {
out.append(buf);
return APR_SUCCESS;
}
-#endif
-
-#if LOG4CXX_LOGCHAR_IS_UTF8
- log4cxx_status_t append(LogString& out, const wchar_t* buf) {
- char utf8[8];
- const wchar_t* current = buf;
- const wchar_t* end = wcschr(buf, 0);
- while(current < end) {
- unsigned int sv = UnicodeHelper::decodeWide(current, end);
- if (sv == 0xFFFF) {
- return APR_BADARG;
- }
- int bytes = UnicodeHelper::encodeUTF8(sv, utf8);
- out.append(utf8, bytes);
- }
- return APR_SUCCESS;
- }
-#endif
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out) {
@@ -275,7 +270,7 @@
stat = APR_BADARG;
break;
}
- int wchars = UnicodeHelper::encodeWide(sv, buf);
+ int wchars = UnicodeHelper::encode(sv, buf);
out.append(buf, wchars);
}
in.position(src - in.data());
@@ -378,7 +373,7 @@
};
-#if LOG4CXX_LOGCHAR_IS_UTF8 && LOG4CXX_HAS_WCHAR_T
+#if LOG4CXX_LOGCHAR_IS_UTF8 && LOG4CXX_HAS_WCHAR_T && (defined(_WIN32) || defined(__STDC_ISO_10646__))
/**
* Decoder to convert array of wchar_t to UTF-8 bytes.
*
@@ -391,6 +386,22 @@
virtual ~WideToUTF8CharsetDecoder() {
}
+
+#if defined(_WIN32)
+ unsigned int decodeWide(const wchar_t*& src, const wchar_t* srcEnd) {
+ unsigned int sv = *(src++);
+ if (sv < 0xDC00 || sv >= 0xDC00) {
+ return sv;
+ }
+ if (src < srcEnd) {
+ unsigned short ls = *(src++);
+ unsigned char w = (unsigned char) ((sv >> 6) & 0x0F);
+ return ((w + 1) << 16) + ((sv & 0x3F) << 10) + (ls & 0x3FF);
+ }
+ return 0xFFFF;
+ }
+#endif
+
virtual log4cxx_status_t decode(ByteBuffer& in,
@@ -400,7 +411,11 @@
out.reserve(out.length() + in.remaining()/sizeof(wchar_t));
char utf8[8];
while(src < srcEnd) {
- unsigned int sv = UnicodeHelper::decodeWide(src, srcEnd);
+#if defined(__STDC_ISO_10646__)
+ unsigned int sv = *(src++);
+#else
+ unsigned int sv = decodeWide(src, srcEnd);
+#endif
if (sv == 0xFFFF) {
return APR_BADARG;
}
@@ -439,7 +454,7 @@
return new ISOLatinCharsetDecoder();
#elif LOG4CXX_LOCALE_ENCODING_US_ASCII
return new USASCIICharsetDecoder();
-#elif LOG4CXX_HAS_WCHAR_T
+#elif LOG4CXX_LOGCHAR_IS_WCHAR
return new MbstowcsCharsetDecoder();
#elif APR_HAS_XLATE
return new APRCharsetDecoder(APR_LOCALE_CHARSET);
Modified: logging/log4cxx/trunk/src/charsetencoder.cpp
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/src/charsetencoder.cpp?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/src/charsetencoder.cpp (original)
+++ logging/log4cxx/trunk/src/charsetencoder.cpp Tue Apr 3 21:16:21 2007
@@ -44,9 +44,11 @@
APRCharsetEncoder(const char* topage) {
#if LOG4CXX_LOGCHAR_IS_WCHAR
const char* frompage = "WCHAR_T";
+#define FROMPAGE "WCHAR_T"
#endif
#if LOG4CXX_LOGCHAR_IS_UTF8
const char* frompage = "UTF-8";
+#define FROMPAGE "UTF-8"
#endif
apr_status_t stat = apr_pool_create(&pool, NULL);
if (stat != APR_SUCCESS) {
@@ -60,7 +62,7 @@
if (topage == APR_DEFAULT_CHARSET) {
throw IllegalArgumentException("APR_DEFAULT_CHARSET");
} else if (topage == APR_LOCALE_CHARSET) {
- throw IllegalArgumentException("APR_LOCALE_CHARSET");
+ throw IllegalArgumentException("APRCharsetEncoder(" FROMPAGE ",APR_LOCALE_CHARSET)");
} else {
throw IllegalArgumentException(topage);
}
@@ -106,7 +108,7 @@
};
#endif
-#if LOG4CXX_HAS_WCHAR_T
+#if LOG4CXX_LOGCHAR_IS_WCHAR
/**
* A character encoder implemented using wcstombs.
*/
@@ -119,8 +121,8 @@
/**
* Converts a wchar_t to the default external multibyte encoding.
*/
- log4cxx_status_t encode(const std::wstring& in,
- std::wstring::const_iterator& iter,
+ log4cxx_status_t encode(const LogString& in,
+ LogString::const_iterator& iter,
ByteBuffer& out) {
log4cxx_status_t stat = APR_SUCCESS;
@@ -168,46 +170,6 @@
return stat;
}
-#if LOG4CXX_LOGCHAR_IS_UTF8
- /**
- * Performs encoding by converting UTF-8 LogString into a wstring
- * and delegating to the other encode method.
- */
- virtual log4cxx_status_t encode(const std::string& in,
- std::string::const_iterator& iter,
- ByteBuffer& out) {
- log4cxx_status_t stat = APR_SUCCESS;
- if (iter != in.end()) {
- std::wstring wideIn;
- wideIn.reserve(in.length());
- const char* src = in.data() + (iter - in.begin());
- const char* srcEnd = in.data() + in.length();
- wchar_t tmp[2];
- while(src < srcEnd) {
- unsigned int sv = UnicodeHelper::decodeUTF8(src, srcEnd);
- if (sv == 0xFFFF) {
- iter = in.begin() + (src - in.data());
- return APR_BADARG;
- }
-
- int wcharCount = UnicodeHelper::encodeWide(sv, tmp);
- wideIn.append(tmp, wcharCount);
- }
- std::wstring::const_iterator wideIter(wideIn.begin());
- stat = encode(wideIn, wideIter, out);
- if (wideIter == wideIn.end()) {
- iter = in.end();
- } else {
- for(std::wstring::const_iterator i = wideIn.begin();
- i != wideIter;
- i++) {
- iter += UnicodeHelper::lengthUTF8(*i);
- }
- }
- }
- return stat;
- }
-#endif
private:
@@ -322,6 +284,7 @@
#endif
#if LOG4CXX_LOGCHAR_IS_WCHAR
+#if defined(_WIN32) || defined(__STDC_ISO_10646__)
/**
* Converts a wstring to UTF-8.
*/
@@ -334,16 +297,16 @@
virtual log4cxx_status_t encode(const LogString& in,
LogString::const_iterator& iter,
ByteBuffer& out) {
- log4cxx_status_t stat = APR_SUCCESS;
- if (iter != in.end()) {
+ log4cxx_status_t stat = APR_SUCCESS;
+ if (iter != in.end()) {
const logchar* const srcBase = in.data();
const logchar* const srcEnd = srcBase + in.length();
const logchar* src = in.data() + (iter - in.begin());
while(out.remaining() >= 8 && src < srcEnd) {
- unsigned int sv = UnicodeHelper::decodeWide(src, srcEnd);
+ unsigned int sv = UnicodeHelper::decode(src, srcEnd);
if (sv == 0xFFFF) {
stat = APR_BADARG;
- break;
+ break;
}
int bytes = UnicodeHelper::encodeUTF8(sv, out.data() + out.position());
out.position(out.position() + bytes);
@@ -357,8 +320,10 @@
UTF8CharsetEncoder(const UTF8CharsetEncoder&);
UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&);
};
+#else
+#error logchar cannot be wchar_t unless _WIN32 or __STDC_ISO_10646___ is defined
+#endif
#endif
-
/**
* Encodes a LogString to UTF16-BE.
@@ -421,38 +386,6 @@
UTF16LECharsetEncoder& operator=(const UTF16LECharsetEncoder&);
};
-#if LOG4CXX_HAS_WCHAR_T
- /**
- * Converts a LogString to an array of wchar_t.
- */
- class WideCharsetEncoder : public CharsetEncoder
- {
- public:
- WideCharsetEncoder() {
- }
-
-
- virtual log4cxx_status_t encode(const LogString& in,
- LogString::const_iterator& iter,
- ByteBuffer& out) {
- log4cxx_status_t stat = APR_SUCCESS;
- while(iter != in.end() && out.remaining() >= 4) {
- unsigned int sv = UnicodeHelper::decode(in, iter);
- if (sv == 0xFFFF) {
- stat = APR_BADARG;
- break;
- }
- int count = UnicodeHelper::encodeWide(sv, (wchar_t*) out.current());
- out.position(out.position() + count * sizeof(wchar_t));
- }
- return stat;
- }
-
- private:
- WideCharsetEncoder(const WideCharsetEncoder&);
- WideCharsetEncoder& operator=(const WideCharsetEncoder&);
- };
-#endif
} // namespace helpers
@@ -487,7 +420,7 @@
return new ISOLatinCharsetEncoder();
#elif LOG4CXX_LOCALE_ENCODING_US_ASCII
return new USASCIICharsetEncoder();
-#elif LOG4CXX_HAS_WCHAR_T
+#elif LOG4CXX_LOGCHAR_IS_WCHAR
return new WcstombsCharsetEncoder();
#elif APR_HAS_XLATE
return new APRCharsetEncoder(APR_LOCALE_CHARSET);
@@ -513,10 +446,12 @@
CharsetEncoder* CharsetEncoder::createWideEncoder() {
#if LOG4CXX_LOGCHAR_IS_WCHAR
return new TrivialCharsetEncoder();
-#endif
-#if LOG4CXX_LOGCHAR_IS_UTF8
+#elif LOG4CXX_LOGCHAR_IS_UTF8 && (defined(_WIN32) || defined(__STDC_ISO_10646__))
return new WideCharsetEncoder();
+#else
+ return new APRCharsetEncoder("WCHAR_T");
#endif
+
}
Modified: logging/log4cxx/trunk/src/unicodehelper.cpp
URL: http://svn.apache.org/viewvc/logging/log4cxx/trunk/src/unicodehelper.cpp?view=diff&rev=525392&r1=525391&r2=525392
==============================================================================
--- logging/log4cxx/trunk/src/unicodehelper.cpp (original)
+++ logging/log4cxx/trunk/src/unicodehelper.cpp Tue Apr 3 21:16:21 2007
@@ -92,80 +92,6 @@
}
-#if LOG4CXX_HAS_WCHAR_T
-#if defined(_WIN32)
-unsigned int UnicodeHelper::decodeWide(const wchar_t*& src, const wchar_t* srcEnd) {
- unsigned int sv = *(src++);
- if (sv < 0xDC00 || sv >= 0xDC00) {
- return sv;
- }
- if (src < srcEnd) {
- unsigned short ls = *(src++);
- unsigned char w = (unsigned char) ((sv >> 6) & 0x0F);
- return ((w + 1) << 16) + ((sv & 0x3F) << 10) + (ls & 0x3FF);
- }
- return 0xFFFF;
-}
-
-
-int UnicodeHelper::encodeWide(unsigned int ch, wchar_t* dst) {
- if (ch <= 0xFFFF) {
- *dst = (wchar_t) ch;
- return 1;
- }
- unsigned char u = (unsigned char) (ch >> 16);
- unsigned char w = (unsigned char) (u - 1);
- wchar_t hs = (wchar_t) (0xD800 + ((w & 0xF) << 6) + ((ch & 0xFFFF) >> 10));
- wchar_t ls = (wchar_t) (0xDC00 + (ch && 0x3FF));
- dst[0] = hs;
- dst[1] = ls;
- return 2;
-}
-
-int UnicodeHelper::lengthUTF8(wchar_t ch) {
- if (ch <= 0x7F) {
- return 1;
- }
- if(ch <= 0x7FF) {
- return 2;
- }
- //
- // if low surrogate, only add 1 which in combination with
- // three from high surrogate makes 4 total UTF-8 bytes
- if (ch >= 0xDC00 && ch <= 0xDFFF) {
- return 1;
- }
- return 3;
-}
-
-#endif
-
-
-#if defined(__STDC_ISO_10646__)
-int UnicodeHelper::encodeWide(unsigned int ch, wchar_t* dst) {
- *dst = ch;
- return 1;
-}
-
-unsigned int UnicodeHelper::decodeWide(const wchar_t*& src, const wchar_t* /* srcEnd */) {
- return *(src++);
-}
-
-int UnicodeHelper::lengthUTF8(wchar_t ch) {
- if (ch <= 0x7F) {
- return 1;
- }
- if(ch <= 0x7FF) {
- return 2;
- }
- if (ch <= 0xFFFF) {
- return 3;
- }
- return 4;
-}
-
-#endif
-#endif
int UnicodeHelper::encodeUTF8(unsigned int ch, char* dst) {
@@ -240,7 +166,22 @@
unsigned int UnicodeHelper::decode(const LogString& in, LogString::const_iterator& iter) {
const wchar_t* src = in.data() + (iter - in.begin());
const wchar_t* srcEnd = in.data() + in.length();
- unsigned int sv = decodeWide(src, srcEnd);
+#if defined(__STDC_ISO_10646__)
+ unsigned int sv = *(src++);
+#elif defined(_WIN32)
+ unsigned int sv = *(src++);
+ if (!(sv < 0xDC00 || sv >= 0xDC00)) {
+ if (src < srcEnd) {
+ unsigned short ls = *(src++);
+ unsigned char w = (unsigned char) ((sv >> 6) & 0x0F);
+ sv = ((w + 1) << 16) + ((sv & 0x3F) << 10) + (ls & 0x3FF);
+ } else {
+ sv = 0xFFFF;
+ }
+ }
+#else
+#error logchar cannot be wchar_t unless _WIN32 or __STDC_ISO_10646__ is defined
+#endif
iter = in.begin() + (src - in.data());
return sv;
}
@@ -259,9 +200,28 @@
#if LOG4CXX_LOGCHAR_IS_WCHAR
+#if defined(_WIN32)
int UnicodeHelper::encode(unsigned int sv, logchar* out) {
- return encodeWide(sv, out);
+ if (ch <= 0xFFFF) {
+ *dst = (wchar_t) ch;
+ return 1;
+ }
+ unsigned char u = (unsigned char) (ch >> 16);
+ unsigned char w = (unsigned char) (u - 1);
+ wchar_t hs = (wchar_t) (0xD800 + ((w & 0xF) << 6) + ((ch & 0xFFFF) >> 10));
+ wchar_t ls = (wchar_t) (0xDC00 + (ch && 0x3FF));
+ dst[0] = hs;
+ dst[1] = ls;
+ return 2;
+}
+#elif defined(__STDC_ISO_10646__)
+int UnicodeHelper::encode(unsigned int sv, logchar* out) {
+ *dst = ch;
+ return 1;
}
+#else
+#error logchar cannot be wchar_t unless _WIN32 or __STDC_ISO_10646__ is defined
+#endif
#endif