You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by jb...@locus.apache.org on 2000/10/09 20:55:59 UTC
cvs commit: xml-xerces/c/src/util/Transcoders/MacOSUnicodeConverter MacOSUnicodeConverter.cpp MacOSUnicodeConverter.hpp
jberry 00/10/09 11:55:59
Modified: c/src/util/Transcoders/MacOSUnicodeConverter
MacOSUnicodeConverter.cpp MacOSUnicodeConverter.hpp
Log:
- Fix Mac OS X support. GCC in this environment sets wchar_t to a 32 bit
value which requires an additional transcoding stage (bleh...)
- Improve sensitivity to environment in order to support a broader
range of system versions.
- Fix a few compiler sensitivities.
Revision Changes Path
1.3 +564 -272 xml-xerces/c/src/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.cpp
Index: MacOSUnicodeConverter.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.cpp,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- MacOSUnicodeConverter.cpp 2000/07/25 22:31:26 1.2
+++ MacOSUnicodeConverter.cpp 2000/10/09 18:55:58 1.3
@@ -56,6 +56,13 @@
/**
* $Log: MacOSUnicodeConverter.cpp,v $
+ * Revision 1.3 2000/10/09 18:55:58 jberry
+ * - Fix Mac OS X support. GCC in this environment sets wchar_t to a 32 bit
+ * value which requires an additional transcoding stage (bleh...)
+ * - Improve sensitivity to environment in order to support a broader
+ * range of system versions.
+ * - Fix a few compiler sensitivities.
+ *
* Revision 1.2 2000/07/25 22:31:26 aruna1
* Char definitions in XMLUni moved to XMLUniDefs
*
@@ -108,21 +115,46 @@
#include <util/XMLUni.hpp>
#include <util/XMLString.hpp>
#include <util/TranscodingException.hpp>
+#include <util/PlatformUtils.hpp>
+#include <util/Janitor.hpp>
+#include <util/Platforms/MacOS/MacOSPlatformUtils.hpp>
#if defined(XML_METROWERKS)
-#include <cwctype>
+ // Only used under metrowerks.
+ // In MacOS X Public Beta, the system wchar.h header is missing, which causes this to fail.
+ #include <cwctype>
#endif
+#include <algorithm>
#include <cstddef>
#include <cstring>
+
+#if TARGET_API_MAC_CARBON
+ #include <Carbon.h>
+#else
+ #include <Errors.h>
+ #include <Script.h>
+ #include <TextUtils.h>
+ #include <TextEncodingConverter.h>
+ #include <TextCommon.h>
+ #include <CodeFragments.h>
+ #include <UnicodeConverter.h>
+ #include <UnicodeUtilities.h>
+#endif
+
+
+// ---------------------------------------------------------------------------
+// Typedefs
+// ---------------------------------------------------------------------------
-#include <Errors.h>
-#include <Script.h>
-#include <TextUtils.h>
-#include <TextEncodingConverter.h>
-#include <TextCommon.h>
-#include <UnicodeConverter.h>
-#include <UnicodeUtilities.h>
+// TempUniBuf is used for cases where we need a temporary conversion due to
+// a mismatch between UniChar (the 16-bit type that the Unicode converter uses)
+// and wchar_t (the type the compiler uses to represent a Unicode character).
+// In the case of Metrowerks, these are the same size. For ProjectBuilder, they
+// differ. TempUniBuf is also used for a few cases where we want to discard the
+// output fromt he unicode converter.
+const std::size_t kTempUniBufCount = 256;
+typedef UniChar TempUniBuf[kTempUniBufCount];
// ---------------------------------------------------------------------------
@@ -133,6 +165,8 @@
chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chNull
};
+// Detect a mismatch in unicode character size.
+const bool kUniSizeMismatch = sizeof(XMLCh) != sizeof(UniChar);
// ---------------------------------------------------------------------------
@@ -140,8 +174,11 @@
// ---------------------------------------------------------------------------
MacOSUnicodeConverter::MacOSUnicodeConverter()
{
+ // Test for presense of unicode collation functions
+ mHasUnicodeCollation = (UCCompareTextDefault != (void*)kUnresolvedCFragSymbolAddress);
}
+
MacOSUnicodeConverter::~MacOSUnicodeConverter()
{
}
@@ -153,48 +190,114 @@
int MacOSUnicodeConverter::compareIString( const XMLCh* const comp1
, const XMLCh* const comp2)
{
-#if 0
- // We could use this if we have a reasonable c library
- // metrowerks supports all of this.
- // Note however that the c library version of towupper is probably
- // not very good. The Unicode Utilities compare routine is probably
- // more diligent, though surely not as quick.
- const XMLCh* cptr1 = comp1;
- const XMLCh* cptr2 = comp2;
-
- while ( (*cptr1 != 0) && (*cptr2 != 0) )
- {
- wint_t wch1 = std::towupper(*cptr1);
- wint_t wch2 = std::towupper(*cptr2);
- if (wch1 != wch2)
- break;
-
- cptr1++;
- cptr2++;
+ // If unicode collation routines are available, use them.
+ // This should be the case on Mac OS 8.6 and later,
+ // with Carbon 1.0.2 or later, and under Mac OS X.
+ //
+ // Otherwise, but only for Metrowerks, since only Metrowerks
+ // has a c library with a valid set of wchar routines,
+ // fall back to the standard library.
+
+ if (mHasUnicodeCollation)
+ {
+ // Use the Unicode Utilities to do the compare
+
+ // This has gotten more painful with the need to allow
+ // conversion between different sizes of XMLCh and UniChar.
+ // We allocate a static buffer and do multiple passes
+ // to allow for the case where the strings being compared
+ // are larger than the static buffer.
+
+ UCCollateOptions collateOptions =
+ kUCCollateComposeInsensitiveMask
+ | kUCCollateWidthInsensitiveMask
+ | kUCCollateCaseInsensitiveMask
+ | kUCCollatePunctuationSignificantMask
+ ;
+
+ std::size_t srcOffset = 0;
+ std::size_t cnt1 = XMLString::stringLen(comp1);
+ std::size_t cnt2 = XMLString::stringLen(comp2);
+
+ // Do multiple passes over source, comparing each pass.
+ // The first pass that's not equal wins.
+ int result = 0;
+ while (result == 0 && (cnt1 || cnt2))
+ {
+ TempUniBuf buf1;
+ TempUniBuf buf2;
+
+ const UniChar* src1;
+ const UniChar* src2;
+
+ std::size_t passCnt1;
+ std::size_t passCnt2;
+
+ if (kUniSizeMismatch)
+ {
+ passCnt1 = std::min(cnt1, kTempUniBufCount);
+ passCnt2 = std::min(cnt2, kTempUniBufCount);
+
+ src1 = CopyXMLChsToUniChars(comp1 + srcOffset, buf1, passCnt1, kTempUniBufCount);
+ src2 = CopyXMLChsToUniChars(comp2 + srcOffset, buf2, passCnt2, kTempUniBufCount);
+ }
+ else
+ {
+ passCnt1 = cnt1;
+ passCnt2 = cnt2;
+
+ src1 = reinterpret_cast<const UniChar*>(comp1);
+ src2 = reinterpret_cast<const UniChar*>(comp2);
+ }
+
+ // Do the actual compare for this pass
+ Boolean equivalent = false;
+ SInt32 order = 0;
+ OSStatus status = UCCompareTextDefault(
+ collateOptions,
+ src1,
+ passCnt1,
+ src2,
+ passCnt2,
+ &equivalent,
+ &order
+ );
+
+ result = ((status != noErr) || equivalent) ? 0 : order;
+
+ srcOffset += kTempUniBufCount;
+ cnt1 -= passCnt1;
+ cnt2 -= passCnt2;
+ }
+
+ return result;
}
- return (int) ( std::towupper(*cptr1) - std::towupper(*cptr2) );
+#if defined(XML_METROWERKS)
+ else
+ {
+ const XMLCh* cptr1 = comp1;
+ const XMLCh* cptr2 = comp2;
+
+ while ( (*cptr1 != 0) && (*cptr2 != 0) )
+ {
+ std::wint_t wch1 = std::towupper(*cptr1);
+ std::wint_t wch2 = std::towupper(*cptr2);
+ if (wch1 != wch2)
+ break;
+
+ cptr1++;
+ cptr2++;
+ }
+ return (int) (std::towupper(*cptr1) - std::towupper(*cptr2));
+ }
#else
- // Use the Unicode Utilities to do the compare
- UCCollateOptions collateOptions =
- kUCCollateComposeInsensitiveMask
- | kUCCollateWidthInsensitiveMask
- | kUCCollateCaseInsensitiveMask
- | kUCCollatePunctuationSignificantMask
- ;
-
- Boolean equivalent = false;
- SInt32 order = 0;
- OSStatus status = UCCompareTextDefault(
- collateOptions,
- reinterpret_cast<const UniChar* const>(comp1),
- XMLString::stringLen(comp1),
- reinterpret_cast<const UniChar* const>(comp2),
- XMLString::stringLen(comp2),
- &equivalent,
- &order
- );
-
- return ((status != noErr) || equivalent) ? 0 : order;
+ else
+ {
+ // For some reason there is no platform utils available
+ // where we expect it. Bail.
+ XMLPlatformUtils::panic(XMLPlatformUtils::Panic_NoTransService);
+ return 0;
+ }
#endif
}
@@ -203,59 +306,123 @@
, const XMLCh* const comp2
, const unsigned int maxChars)
{
-#if 0
- // We could use this if we have a reasonable c library
- // metrowerks supports all of this.
- // Note however that the c library version of towupper is probably
- // not very good. The Unicode Utilities compare routine is probably
- // more diligent, though surely not as quick.
- unsigned int n = 0;
- const XMLCh* cptr1 = comp1;
- const XMLCh* cptr2 = comp2;
+ // If unicode collation routines are available, use them.
+ // This should be the case on Mac OS 8.6 and later,
+ // with Carbon 1.0.2 or later, and under Mac OS X.
+ //
+ // Otherwise, but only for Metrowerks, since only Metrowerks
+ // has a c library with a valid set of wchar routines,
+ // fall back to the standard library.
- while ( (*cptr1 != 0) && (*cptr2 != 0) && (n < maxChars) )
- {
- wint_t wch1 = towupper(*cptr1);
- wint_t wch2 = towupper(*cptr2);
- if (wch1 != wch2)
- break;
-
- cptr1++;
- cptr2++;
- n++;
- }
- return (int) ( towupper(*cptr1) - towupper(*cptr2) );
+ if (mHasUnicodeCollation)
+ {
+ // Use the Unicode Utilities to do the compare
+
+ // This has gotten more painful with the need to allow
+ // conversion between different sizes of XMLCh and UniChar.
+ // We allocate a static buffer and do multiple passes
+ // to allow for the case where the strings being compared
+ // are larger than the static buffer.
+
+ UCCollateOptions collateOptions =
+ kUCCollateComposeInsensitiveMask
+ | kUCCollateWidthInsensitiveMask
+ | kUCCollateCaseInsensitiveMask
+ | kUCCollatePunctuationSignificantMask
+ ;
+
+ std::size_t srcOffset = 0;
+ std::size_t cnt1 = XMLString::stringLen(comp1);
+ std::size_t cnt2 = XMLString::stringLen(comp2);
+
+ // Restrict view of source characters to first {maxChars}
+ if (cnt1 > maxChars)
+ cnt1 = maxChars;
+
+ if (cnt2 > maxChars)
+ cnt2 = maxChars;
+
+ // Do multiple passes over source, comparing each pass.
+ // The first pass that's not equal wins.
+ int result = 0;
+ while (result == 0 && (cnt1 || cnt2))
+ {
+ TempUniBuf buf1;
+ TempUniBuf buf2;
+
+ const UniChar* src1;
+ const UniChar* src2;
+
+ std::size_t passCnt1;
+ std::size_t passCnt2;
+
+ if (kUniSizeMismatch)
+ {
+ passCnt1 = std::min(cnt1, kTempUniBufCount);
+ passCnt2 = std::min(cnt2, kTempUniBufCount);
+
+ src1 = CopyXMLChsToUniChars(comp1 + srcOffset, buf1, passCnt1, kTempUniBufCount);
+ src2 = CopyXMLChsToUniChars(comp2 + srcOffset, buf2, passCnt2, kTempUniBufCount);
+ }
+ else
+ {
+ passCnt1 = cnt1;
+ passCnt2 = cnt2;
+
+ src1 = reinterpret_cast<const UniChar*>(comp1);
+ src2 = reinterpret_cast<const UniChar*>(comp2);
+ }
+
+ // Do the actual compare for this pass
+ Boolean equivalent = false;
+ SInt32 order = 0;
+ OSStatus status = UCCompareTextDefault(
+ collateOptions,
+ src1,
+ passCnt1,
+ src2,
+ passCnt2,
+ &equivalent,
+ &order
+ );
+
+ result = ((status != noErr) || equivalent) ? 0 : order;
+
+ srcOffset += kTempUniBufCount;
+ cnt1 -= passCnt1;
+ cnt2 -= passCnt2;
+ }
+
+ return result;
+ }
+#if defined(XML_METROWERKS)
+ else
+ {
+ unsigned int n = 0;
+ const XMLCh* cptr1 = comp1;
+ const XMLCh* cptr2 = comp2;
+
+ while ( (*cptr1 != 0) && (*cptr2 != 0) && (n < maxChars) )
+ {
+ std::wint_t wch1 = std::towupper(*cptr1);
+ std::wint_t wch2 = std::towupper(*cptr2);
+ if (wch1 != wch2)
+ break;
+
+ cptr1++;
+ cptr2++;
+ n++;
+ }
+ return (int)(std::towupper(*cptr1) - std::towupper(*cptr2));
+ }
#else
- // Use the Unicode Utilities to do the compare
- UCCollateOptions collateOptions =
- kUCCollateComposeInsensitiveMask
- | kUCCollateWidthInsensitiveMask
- | kUCCollateCaseInsensitiveMask
- | kUCCollatePunctuationSignificantMask
- ;
-
- std::size_t len1 = XMLString::stringLen(comp1);
- std::size_t len2 = XMLString::stringLen(comp2);
-
- if (len1 > maxChars)
- len1 = maxChars;
-
- if (len2 > maxChars)
- len2 = maxChars;
-
- Boolean equivalent = false;
- SInt32 order = 0;
- OSStatus status = UCCompareTextDefault(
- collateOptions,
- reinterpret_cast<const UniChar* const>(comp1),
- len1,
- reinterpret_cast<const UniChar* const>(comp2),
- len2,
- &equivalent,
- &order
- );
-
- return ((status != noErr) || equivalent) ? 0 : order;
+ else
+ {
+ // For some reason there is no platform utils available
+ // where we expect it. Bail.
+ XMLPlatformUtils::panic(XMLPlatformUtils::Panic_NoTransService);
+ return 0;
+ }
#endif
}
@@ -272,6 +439,10 @@
// Use this if there's a reasonable c library available.
// ProjectBuilder currently has no support for iswspace ;(
return (std::iswspace(toCheck) != 0);
+#elif defined(XML_MACOSX) || true
+ // This looks fairly good, assuming we're on an ascii compiler.
+ // We'll use this under ProjectBuilder for now.
+ return (toCheck == L' ');
#elif 0
// This is okay but probably kind of slow for what we need
UCCharPropertyValue propValue = 0;
@@ -282,10 +453,6 @@
kUCCharPropTypeGenlCategory,
&propValue);
return (status == noErr) && (propValue == kUCGenlCatSeparatorSpace);
-#elif defined(XML_MACOSX) || true
- // This looks fairly good, assuming we're on an ascii compiler.
- // We'll use this under ProjectBuilder for now.
- return (toCheck == L' ');
#endif
}
@@ -340,6 +507,7 @@
void MacOSUnicodeConverter::upperCase(XMLCh* const toUpperCase) const
{
+ // ��� TODO: Support CFString for this conversion
#if defined(XML_METROWERKS)
// Use this if there's a reasonable c library available.
// Metrowerks does this reasonably
@@ -348,12 +516,12 @@
while ((c = *p) != 0)
*p++ = std::towupper(c);
-#elif 1
+#elif defined(XML_MACOSX) || true
// This might work, assuming we're on an ascii compiler.
// We'll use this under ProjectBuilder for now.
// Note that this only handles the ascii portion of the
// string, leaving all other characters in original case.
- wchar_t * p = (wchar_t*) toUpperCase;
+ wchar_t * p = (wchar_t*)toUpperCase;
wchar_t c;
while ((c = *p) != 0)
@@ -362,13 +530,6 @@
c += 'A' - 'a';
*p++ = c;
}
-#elif 0
- // This looks like it would work, but doesn't.
- // Apart from CFString, there doesn't appear to be any
- // other likely candidates for this support in the OS.
- // Seems like the Unicode converter should have a routine
- // do this!!!
- UppercaseText(reinterpret_cast<Ptr>(toUpperCase), XMLString::stringLen(toUpperCase), smUnicodeScript);
#else
#error Sorry, no support for upperCase
#endif
@@ -384,7 +545,6 @@
}
-
// ---------------------------------------------------------------------------
// MacOSTransService: The protected virtual transcoding service API
// ---------------------------------------------------------------------------
@@ -404,7 +564,7 @@
Str255 pasEncodingName;
char cEncodingName[256];
- // TODO: Use Transcode instead here!
+ // ��� TODO: Use Transcode instead here!
ConvertWideToNarrow(encodingName, cEncodingName, sizeof(cEncodingName));
CopyCStringToPascal(cEncodingName, pasEncodingName);
@@ -470,7 +630,7 @@
// ---------------------------------------------------------------------------
unsigned int
-MacOSTranscoder::transcodeFrom( const XMLByte* const srcData
+MacOSTranscoder::transcodeFrom( const XMLByte* const srcData
, const unsigned int srcCount
, XMLCh* const toFill
, const unsigned int maxChars
@@ -490,12 +650,12 @@
| kUnicodeLooseMappingsMask
// | kUnicodeStringUnterminatedMask
// | kUnicodeTextRunMask
- , // control flags
- 0, // ioffset count
- NULL, // ioffset array
- 0, // ooffset count
- NULL, // ooffset array
- maxChars * sizeof(UniChar),
+ , // control flags
+ 0, // ioffset count
+ NULL, // ioffset array
+ 0, // ooffset count
+ NULL, // ooffset array
+ maxChars * sizeof(UniChar), // iOutputBufLen (bytes)
&bytesConsumed,
&bytesProduced,
reinterpret_cast<UniCharArrayPtr>(toFill));
@@ -507,9 +667,19 @@
bytesConsumed = 0;
bytesProduced = 0;
}
-
+
+ std::size_t charsProduced = bytesProduced / sizeof(UniChar);
+
+ // If XMLCh is not same length as UniChar (under GCC)
+ // then we need to convert the UniChar characters up to
+ // XMLCh. We lied about the max buffer length above in
+ // order to leave room in our output buffer. So we know
+ // we're in good shape here to just convert in place.
+ if (kUniSizeMismatch)
+ CopyUniCharsToXMLChs(reinterpret_cast<UniChar* const>(toFill), toFill, charsProduced, maxChars);
+
bytesEaten = bytesConsumed;
- return bytesProduced / sizeof(UniChar);
+ return charsProduced;
}
@@ -521,8 +691,11 @@
, unsigned int& charsEaten
, const UnRepOpts options)
{
- ByteCount bytesConsumed = 0;
- ByteCount bytesProduced = 0;
+ ByteCount totalCharsConsumed = 0;
+ ByteCount totalCharsProduced = 0;
+
+ const XMLCh* src = srcData;
+ std::size_t srcCnt = srcCount;
OptionBits controlFlags =
0
@@ -535,26 +708,62 @@
if (options == UnRep_RepChar)
controlFlags |= kUnicodeUseFallbacksMask;
- OSStatus status = ConvertFromUnicodeToText(
- mUnicodeToTextInfo,
- srcCount * sizeof(XMLCh), // src byte count
- reinterpret_cast<const UniChar*>(srcData),
- controlFlags, // control flags
- 0, // ioffset count
- NULL, // ioffset array
- 0, // ooffset count
- NULL, // ooffset array
- maxBytes,
- &bytesConsumed,
- &bytesProduced,
- toFill);
+ // Do multiple passes of conversion, potentially,
+ // in order to handle the case of a character size
+ // mismatch.
+ OSStatus status;
+ for (status = noErr; status == noErr && srcCnt > 0; )
+ {
+ ByteCount bytesConsumed = 0;
+ ByteCount bytesProduced = 0;
+ std::size_t passCnt = 0;
+ const UniChar* passSrc = NULL;
+
+ // Setup source buffer as needed to accomodate a unicode
+ // character size mismatch.
+ TempUniBuf buf;
+ if (kUniSizeMismatch)
+ {
+ passCnt = std::min(srcCnt, kTempUniBufCount);
+ passSrc = CopyXMLChsToUniChars(src, buf, passCnt, kTempUniBufCount);
+ }
+ else
+ {
+ passCnt = srcCnt;
+ passSrc = reinterpret_cast<const UniChar*>(src);
+ }
+
+ status = ConvertFromUnicodeToText(
+ mUnicodeToTextInfo,
+ passCnt * sizeof(UniChar), // src byte count
+ reinterpret_cast<const UniChar*>(passSrc),
+ controlFlags, // control flags
+ 0, // ioffset count
+ NULL, // ioffset array
+ 0, // ooffset count
+ NULL, // ooffset array
+ maxBytes - totalCharsProduced,
+ &bytesConsumed,
+ &bytesProduced,
+ toFill + totalCharsProduced);
+
+ std::size_t charsConsumed = bytesConsumed / sizeof(UniChar);
+
+ src += charsConsumed;
+ srcCnt -= charsConsumed;
+
+ totalCharsConsumed += charsConsumed;
+ totalCharsProduced += bytesProduced;
+
+ controlFlags |= kUnicodeKeepInfoMask;
+ }
if (status != noErr)
{
if (status == kTECUnmappableElementErr && options == UnRep_Throw)
{
XMLCh tmpBuf[16];
- XMLString::binToText((unsigned int)&srcData[bytesConsumed/sizeof(XMLCh)], tmpBuf, 16, 16);
+ XMLString::binToText((unsigned int)&srcData[totalCharsConsumed], tmpBuf, 16, 16);
ThrowXML2
(
TranscodingException
@@ -564,12 +773,12 @@
);
}
- bytesConsumed = 0;
- bytesProduced = 0;
+ totalCharsConsumed = 0;
+ totalCharsProduced = 0;
}
- charsEaten = bytesConsumed / sizeof(XMLCh);
- return bytesProduced;
+ charsEaten = totalCharsConsumed;
+ return totalCharsProduced;
}
@@ -618,14 +827,17 @@
// converting twic. It would be nice if the calling code could do some
// extra buffering to avoid this result.
// ---------------------------------------------------------------------------
-unsigned int MacOSLCPTranscoder::calcRequiredSize(const char* const srcText)
+unsigned int
+MacOSLCPTranscoder::calcRequiredSize(const char* const srcText)
{
if (!srcText)
return 0;
- const char* src = srcText;
- std::size_t totalBytesProduced = 0;
+ std::size_t totalCharsProduced = 0;
+ const char* src = srcText;
+ std::size_t srcCnt = std::strlen(src);
+
OptionBits options =
kUnicodeUseFallbacksMask
// | kUnicodeKeepInfoMask
@@ -640,35 +852,35 @@
{
ByteCount bytesConsumed = 0;
ByteCount bytesProduced = 0;
+
+ TempUniBuf buf;
status = ConvertFromTextToUnicode(
mTextToUnicodeInfo,
- std::strlen(src), // src byte count
+ srcCnt, // src byte count
src,
options, // control flags
0, // ioffset count
NULL, // ioffset array
0, // ooffset count
NULL, // ooffset array
- kTempBufSize,
+ kTempUniBufCount * sizeof(UniChar),
&bytesConsumed,
&bytesProduced,
- (UniChar*)mTempBuf);
+ buf);
+
+ src += bytesConsumed;
+ srcCnt -= bytesConsumed;
+ totalCharsProduced += bytesProduced / sizeof(UniChar);
options |= kUnicodeKeepInfoMask;
-
- if (status == noErr || status == kTECOutputBufferFullStatus || status == kTECPartialCharErr)
- {
- totalBytesProduced += bytesProduced;
- src += bytesConsumed;
- }
}
if (status != noErr && status != kTECPartialCharErr)
- totalBytesProduced = 0;
+ totalCharsProduced = 0;
// Return number of XMLCh characters required (not counting terminating NULL!)
- return totalBytesProduced / sizeof(XMLCh);
+ return totalCharsProduced;
}
@@ -679,13 +891,16 @@
// converting twic. It would be nice if the calling code could do some
// extra buffering to avoid this result.
// ---------------------------------------------------------------------------
-unsigned int MacOSLCPTranscoder::calcRequiredSize(const XMLCh* const srcText)
+unsigned int
+MacOSLCPTranscoder::calcRequiredSize(const XMLCh* const srcText)
{
if (!srcText)
return 0;
+
+ std::size_t totalBytesProduced = 0;
- const XMLCh* src = const_cast<const XMLCh*>(srcText);
- std::size_t totalBytesProduced = 0;
+ const XMLCh* src = srcText;
+ std::size_t srcCnt = XMLString::stringLen(src);
OptionBits options =
kUnicodeUseFallbacksMask
@@ -695,32 +910,53 @@
;
OSStatus status;
- for (status = kTECOutputBufferFullStatus; status == kTECOutputBufferFullStatus; )
+ for (status = noErr; status == noErr && srcCnt > 0; )
{
- ByteCount bytesConsumed = 0;
- ByteCount bytesProduced = 0;
+ ByteCount bytesConsumed = 0;
+ ByteCount bytesProduced = 0;
+ std::size_t passCnt = 0;
+ const UniChar* passSrc = NULL;
+
+ // Setup source buffer as needed to accomodate a unicode
+ // character size mismatch.
+ TempUniBuf iBuf;
+ if (kUniSizeMismatch)
+ {
+ passCnt = std::min(srcCnt, kTempUniBufCount);
+ passSrc = CopyXMLChsToUniChars(src, iBuf, passCnt, kTempUniBufCount);
+ }
+ else
+ {
+ passCnt = srcCnt;
+ passSrc = reinterpret_cast<const UniChar*>(src);
+ }
+
+ char oBuf[kTempUniBufCount];
status = ConvertFromUnicodeToText(
mUnicodeToTextInfo,
- XMLString::stringLen(src) * sizeof(XMLCh), // src byte count
- reinterpret_cast<ConstUniCharArrayPtr>(src),
+ passCnt * sizeof(UniChar), // src byte count
+ passSrc, // source buffer
options, // control flags
0, // ioffset count
NULL, // ioffset array
0, // ooffset count
NULL, // ooffset array
- kTempBufSize,
+ kTempUniBufCount * sizeof(UniChar), // output buffer size in bytes
&bytesConsumed,
&bytesProduced,
- mTempBuf);
+ oBuf);
+
+ std::size_t charsConsumed = bytesConsumed / sizeof(UniChar);
+ src += charsConsumed;
+ srcCnt -= charsConsumed;
+ totalBytesProduced += bytesProduced;
+
+ if (status == kTECOutputBufferFullStatus)
+ status = noErr;
+
options |= kUnicodeKeepInfoMask;
-
- if (status == noErr || status == kTECOutputBufferFullStatus || status == kTECPartialCharErr)
- {
- totalBytesProduced += bytesProduced;
- src = reinterpret_cast<const XMLCh*>(reinterpret_cast<const char*>(src) + bytesConsumed);
- }
}
if (status != noErr && status != kTECPartialCharErr)
@@ -731,15 +967,16 @@
}
-char* MacOSLCPTranscoder::transcode(const XMLCh* const srcText)
+char*
+MacOSLCPTranscoder::transcode(const XMLCh* const srcText)
{
if (!srcText)
return NULL;
- char* result = NULL;
- const UniChar* src = (const UniChar*)srcText;
- std::size_t srcLen = XMLString::stringLen(srcText) * sizeof(XMLCh);
- std::size_t resultLen = 0;
+ ArrayJanitor<char> result(0);
+ const XMLCh* src = srcText;
+ std::size_t srcCnt = XMLString::stringLen(src);
+ std::size_t resultCnt = 0;
OptionBits options =
kUnicodeUseFallbacksMask
@@ -749,73 +986,92 @@
;
OSStatus status;
- for (status = kTECOutputBufferFullStatus; status == kTECOutputBufferFullStatus; )
+ for (status = noErr; status == noErr && srcCnt > 0; )
{
// Convert an (internal) buffer full of text
- ByteCount bytesConsumed = 0;
- ByteCount bytesProduced = 0;
-
+ ByteCount bytesConsumed = 0;
+ ByteCount bytesProduced = 0;
+ std::size_t passCnt = 0;
+ const UniChar* passSrc = NULL;
+
+ // Setup source buffer as needed to accomodate a unicode
+ // character size mismatch.
+ TempUniBuf iBuf;
+ if (kUniSizeMismatch)
+ {
+ passCnt = std::min(srcCnt, kTempUniBufCount);
+ passSrc = CopyXMLChsToUniChars(src, iBuf, passCnt, kTempUniBufCount);
+ }
+ else
+ {
+ passCnt = srcCnt;
+ passSrc = reinterpret_cast<const UniChar*>(src);
+ }
+
+ TempUniBuf oBuf;
+
status = ConvertFromUnicodeToText(
mUnicodeToTextInfo,
- srcLen, // src byte count
- src,
+ passCnt * sizeof(UniChar), // src byte count
+ passSrc, // source buffer
options, // control flags
0, // ioffset count
NULL, // ioffset array
0, // ooffset count
NULL, // ooffset array
- kTempBufSize,
+ kTempUniBufCount * sizeof(UniChar),
&bytesConsumed,
&bytesProduced,
- mTempBuf);
-
- options |= kUnicodeKeepInfoMask;
-
+ oBuf);
+
// Move the data to result buffer, reallocating as needed
- if (status == noErr || status == kTECOutputBufferFullStatus || status == kTECPartialCharErr)
+ if (bytesProduced > 0)
{
// Allocate space for result
- char* newResult = new char[resultLen + bytesProduced + 1];
- if (newResult != NULL)
+ std::size_t newCnt = resultCnt + bytesProduced;
+ ArrayJanitor<char> newResult(new char[newCnt + 1]);
+ if (newResult.get() != NULL)
{
// Incorporate previous result
- if (result != NULL)
- {
- std::memcpy(newResult, result, resultLen);
- delete [] result;
- }
- result = newResult;
+ if (result.get() != NULL)
+ std::memcpy(newResult.get(), result.get(), resultCnt);
+ result.reset(newResult.release());
// Copy in new data
- std::memcpy(result + resultLen, mTempBuf, bytesProduced);
- resultLen += bytesProduced;
- result[resultLen] = '\0';
+ std::memcpy(result.get() + resultCnt, oBuf, bytesProduced);
+ resultCnt = newCnt;
+
+ result[resultCnt] = '\0';
}
-
- src = (const UniChar*)((const char*)src + bytesConsumed);
- srcLen -= bytesConsumed;
}
+
+ std::size_t charsConsumed = bytesConsumed / sizeof(UniChar);
+ src += charsConsumed;
+ srcCnt -= charsConsumed;
+
+ if (status == kTECOutputBufferFullStatus)
+ status = noErr;
+
+ options |= kUnicodeKeepInfoMask;
}
if (status != noErr && status != kTECPartialCharErr)
- {
- delete [] result;
- result = NULL;
- }
+ result.reset();
- return result;
+ return result.release();
}
-XMLCh* MacOSLCPTranscoder::transcode(const char* const srcText)
+XMLCh*
+MacOSLCPTranscoder::transcode(const char* const srcText)
{
if (!srcText)
return NULL;
- XMLCh* result = NULL;
+ ArrayJanitor<XMLCh> result(0);
const char* src = srcText;
- std::size_t resultLen = 0;
- std::size_t srcLen = std::strlen(src);
+ std::size_t srcCnt = std::strlen(src);
+ std::size_t resultCnt = 0;
OptionBits options =
kUnicodeUseFallbacksMask
@@ -827,65 +1083,69 @@
;
OSStatus status;
- for (status = kTECOutputBufferFullStatus; status == kTECOutputBufferFullStatus; )
+ for (status = noErr; status == noErr && srcCnt > 0; )
{
// Convert an (internal) buffer full of text
ByteCount bytesConsumed = 0;
ByteCount bytesProduced = 0;
+
+ TempUniBuf buf;
status = ConvertFromTextToUnicode(
mTextToUnicodeInfo,
- srcLen, // src byte count
+ srcCnt, // src byte count
src,
options, // control flags
0, // ioffset count
NULL, // ioffset array
0, // ooffset count
NULL, // ooffset array
- kTempBufSize,
+ kTempUniBufCount * sizeof(UniChar), // Byte count of destination buffer
&bytesConsumed,
&bytesProduced,
- (UniChar*)mTempBuf);
-
- options |= kUnicodeKeepInfoMask;
+ buf);
+ std::size_t charsProduced = bytesProduced / sizeof(UniChar);
+
// Move the data to result buffer, reallocating as needed
- if (status == noErr || status == kTECOutputBufferFullStatus || status == kTECPartialCharErr)
+ if (charsProduced > 0)
{
// Allocate space for result
- XMLCh* newResult = new XMLCh[(resultLen + bytesProduced) / sizeof(XMLCh) + 1];
- if (newResult != NULL)
+ std::size_t newCnt = resultCnt + charsProduced;
+ ArrayJanitor<XMLCh> newResult(new XMLCh[newCnt + 1]);
+ if (newResult.get() != NULL)
{
// Incorporate previous result
- if (result != NULL)
- {
- std::memcpy(newResult, result, resultLen);
- delete [] result;
- }
- result = newResult;
-
- // Copy in new data
- std::memcpy(result + resultLen, mTempBuf, bytesProduced);
- resultLen += bytesProduced;
- result[resultLen / sizeof(XMLCh)] = 0;
+ if (result.get() != NULL)
+ std::memcpy(newResult.get(), result.get(), resultCnt * sizeof(XMLCh));
+ result.reset(newResult.release());
+
+ // Copy in new data, converting character formats as necessary
+ CopyUniCharsToXMLChs(buf, result.get() + resultCnt, charsProduced, charsProduced);
+ resultCnt = newCnt;
+
+ result[resultCnt] = 0;
}
-
- src += bytesConsumed;
- srcLen -= bytesConsumed;
}
+
+ src += bytesConsumed;
+ srcCnt -= bytesConsumed;
+
+ if (status == kTECOutputBufferFullStatus)
+ status = noErr;
+
+ options |= kUnicodeKeepInfoMask;
}
if (status != noErr && status != kTECPartialCharErr)
- {
- delete [] result;
- result = NULL;
- }
+ result.reset();
- return result;
+ return result.release();
}
-bool MacOSLCPTranscoder::transcode( const char* const toTranscode
+bool
+MacOSLCPTranscoder::transcode( const char* const toTranscode
, XMLCh* const toFill
, const unsigned int maxChars)
{
@@ -906,7 +1166,7 @@
ByteCount bytesConsumed = 0;
ByteCount bytesProduced = 0;
-
+
OptionBits options =
kUnicodeUseFallbacksMask
// | kUnicodeKeepInfoMask
@@ -929,21 +1189,31 @@
&bytesConsumed,
&bytesProduced,
reinterpret_cast<UniChar* const>(toFill));
-
+
+ std::size_t charsProduced = bytesProduced / sizeof(UniChar);
+
+ // If we have a size mismatch, then convert from UniChar to
+ // XMLCh in place within the output buffer.
+ if (kUniSizeMismatch)
+ CopyUniCharsToXMLChs(reinterpret_cast<UniChar* const>(toFill), toFill, charsProduced, maxChars);
+
// Zero terminate the output string
- std::size_t charsProduced = bytesProduced / sizeof(XMLCh);
toFill[charsProduced] = L'\0';
return (status == noErr);
}
- bool MacOSLCPTranscoder::transcode( const XMLCh* const toTranscode
+bool
+MacOSLCPTranscoder::transcode( const XMLCh* const toTranscode
, char* const toFill
, const unsigned int maxChars)
{
// toFill must contain space for maxChars bytes characters + 1 (for terminating NULL).
-
+ const XMLCh* src = toTranscode;
+ std::size_t srcCnt = XMLString::stringLen(src);
+ std::size_t totalCharsProduced = 0;
+
// Watch for a few psycho corner cases
if (!toTranscode || !maxChars)
{
@@ -951,15 +1221,7 @@
return true;
}
- if (!*toTranscode)
- {
- toFill[0] = 0;
- return true;
- }
-
- ByteCount bytesConsumed = 0;
- ByteCount bytesProduced = 0;
-
+ // Set up options for converter
OptionBits options =
kUnicodeUseFallbacksMask
| kUnicodeLooseMappingsMask
@@ -967,23 +1229,57 @@
// | kUnicodeStringUnterminatedMask
;
- OSStatus status = ConvertFromUnicodeToText(
- mUnicodeToTextInfo,
- XMLString::stringLen(toTranscode) * sizeof(XMLCh), // src byte count
- reinterpret_cast<const UniChar*>(toTranscode),
- options, // control flags
- 0, // ioffset count
- NULL, // ioffset array
- 0, // ooffset count
- NULL, // ooffset array
- maxChars,
- &bytesConsumed,
- &bytesProduced,
- toFill);
-
+ // Do multiple passes of conversion, potentially,
+ // in order to handle the case of a character size
+ // mismatch.
+ OSStatus status;
+ for (status = noErr; status == noErr && srcCnt > 0; )
+ {
+ ByteCount bytesConsumed = 0;
+ ByteCount bytesProduced = 0;
+ std::size_t passCnt = 0;
+ const UniChar* passSrc = NULL;
+
+ // Setup source buffer as needed to accomodate a unicode
+ // character size mismatch.
+ TempUniBuf buf;
+ if (kUniSizeMismatch)
+ {
+ passCnt = std::min(srcCnt, kTempUniBufCount);
+ passSrc = CopyXMLChsToUniChars(src, buf, passCnt, kTempUniBufCount);
+ }
+ else
+ {
+ passCnt = srcCnt;
+ passSrc = reinterpret_cast<const UniChar*>(src);
+ }
+
+ status = ConvertFromUnicodeToText(
+ mUnicodeToTextInfo,
+ passCnt * sizeof(UniChar), // src byte count
+ passSrc, // src buffer
+ options, // control flags
+ 0, // ioffset count
+ NULL, // ioffset array
+ 0, // ooffset count
+ NULL, // ooffset array
+ maxChars - totalCharsProduced,
+ &bytesConsumed,
+ &bytesProduced,
+ toFill + totalCharsProduced);
+
+ std::size_t charsConsumed = bytesConsumed / sizeof(UniChar);
+
+ src += charsConsumed;
+ srcCnt -= charsConsumed;
+
+ totalCharsProduced += bytesProduced;
+
+ options |= kUnicodeKeepInfoMask;
+ }
+
// Zero terminate the output string
- toFill[bytesProduced] = '\0';
+ toFill[totalCharsProduced] = '\0';
return (status == noErr);
}
-
1.2 +14 -9 xml-xerces/c/src/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.hpp
Index: MacOSUnicodeConverter.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.hpp,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MacOSUnicodeConverter.hpp 2000/07/18 18:26:16 1.1
+++ MacOSUnicodeConverter.hpp 2000/10/09 18:55:58 1.2
@@ -59,6 +59,13 @@
/**
* $Log: MacOSUnicodeConverter.hpp,v $
+ * Revision 1.2 2000/10/09 18:55:58 jberry
+ * - Fix Mac OS X support. GCC in this environment sets wchar_t to a 32 bit
+ * value which requires an additional transcoding stage (bleh...)
+ * - Improve sensitivity to environment in order to support a broader
+ * range of system versions.
+ * - Fix a few compiler sensitivities.
+ *
* Revision 1.1 2000/07/18 18:26:16 andyh
* Mac OS update.
* Contributed by James Berry <jb...@criticalpath.com>
@@ -90,9 +97,14 @@
*/
#include <util/TransService.hpp>
-#include <UnicodeConverter.h>
#include <cstddef>
+#if TARGET_API_MAC_CARBON
+ #include <Carbon.h>
+#else
+ #include <UnicodeConverter.h>
+#endif
+
//
// The transcoding service has to provide a couple of required string
// and character operations, but its most important service is the creation
@@ -154,6 +166,8 @@
private :
friend class XMLPlatformUtils;
+ bool mHasUnicodeCollation; // True if unicode collation is available
+
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
@@ -297,12 +311,6 @@
// -----------------------------------------------------------------------
TextToUnicodeInfo mTextToUnicodeInfo;
UnicodeToTextInfo mUnicodeToTextInfo;
-
- enum {
- kTempBufSize = 512
- };
-
- char mTempBuf[kTempBufSize]; // Temp buf used while in calcRequiredSize
-};
+ };
#endif