You are viewing a plain text version of this content. The canonical link for it is here.
Posted to c-dev@xerces.apache.org by "jose (JIRA)" <xe...@xml.apache.org> on 2006/03/01 12:51:41 UTC
[jira] Created: (XERCESC-1571) Problem with acute accent unicode
Latin character
Problem with acute accent unicode Latin character
-------------------------------------------------
Key: XERCESC-1571
URL: http://issues.apache.org/jira/browse/XERCESC-1571
Project: Xerces-C++
Type: Bug
Components: Miscellaneous
Versions: 2.7.0
Environment: uname -a: Linux aries 2.6.15.4-Abysal #1 SMP Wed Feb 22 17:28:51 CET 2006 i686 GNU/Linux
Reporter: jose
Priority: Minor
The small example program below demonstrates the problem:
---------------------------------------------------------------------------------------------------------------------
#include <stdio.h>
#include <locale.h>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/dom/DOM.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/framework/StdOutFormatTarget.hpp>
const XMLCh pCore[] = { 'C', 'o', 'r', 'e', '\0' };
const XMLCh pRoot[] = { 'r', 'o', 'o', 't', '\0' };
const XMLCh pVers[] = { '1', '.', '0', '\0' };
const XMLCh pEnc [] = { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' };
const XMLCh pLS [] = { 'L', 'S', '\0' };
const XMLCh pLF [] = { '\n', '\0' };
using namespace xercesc;
void printf_xml( const DOMDocument *doc )
{
DOMImplementation *irl = DOMImplementationRegistry::getDOMImplementation( pLS );
DOMWriter *theSerializer = ((DOMImplementationLS *)irl)->createDOMWriter();
theSerializer->setEncoding( pEnc );
XMLFormatTarget *xmlft = new StdOutFormatTarget();
theSerializer->writeNode( xmlft, *doc );
delete xmlft;
delete theSerializer;
}
int main( int argc, char *argv[] )
{
setlocale( LC_ALL, "es_ES@euro" );
XMLPlatformUtils::Initialize( "es_ES" );
{
DOMImplementation *irc = DOMImplementationRegistry::getDOMImplementation( pCore );
DOMDocument *doc = irc->createDocument( 0, 0, 0 );
doc->setVersion( pVers );
doc->setEncoding( pEnc );
DOMElement *de = doc->createElement( pRoot );
static XMLCh *pValue = XMLString::transcode( "1.234´56" );
DOMText *dt = doc->createTextNode( pValue );
XMLString::release( &pValue );
de->appendChild( dt );
doc->appendChild( de );
printf_xml( doc );
doc->release();
}
XMLPlatformUtils::Terminate();
return 0;
}
---------------------------------------------------------------------------------------------------------------------
Keep close attention to the XMLString::transcode() call.
The value "1.234´56" is passed using the "ACUTE ACCENT" between characters '4' and '5'.
When the program is run, the result is as follows:
<?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?><root>1.234Ž56</root>
Why does the unicode character Ž appear instead of ´?
Character Ž corresponds to "LATIN CAPITAL LETTER Z WITH CARON"
and not to "ACUTE ACCENT" that must be ´ on unicode.
--
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org
[jira] Resolved: (XERCESC-1571) Problem with acute accent unicode
Latin character
Posted by "Alberto Massari (JIRA)" <xe...@xml.apache.org>.
[ http://issues.apache.org/jira/browse/XERCESC-1571?page=all ]
Alberto Massari resolved XERCESC-1571:
--------------------------------------
Resolution: Invalid
First, I have to say that what you are trying to do is not healthy; trying to force a locale so that you can transcode a string using XMLString::transcode has several drawbacks, and won't always work (the system could not allow es_ES as a locale, for instance); you should instead create your own transcoder for the desidered locale and use its transcodeFrom/transcodeTo methods.
Having said this, your code could work if:
- you have executed runConfigure with the option -t IconvGNU
- your replace the setlocale(LC_ALL, "es_ES") with setenv("LC_ALL","es_ES",q)
> Problem with acute accent unicode Latin character
> -------------------------------------------------
>
> Key: XERCESC-1571
> URL: http://issues.apache.org/jira/browse/XERCESC-1571
> Project: Xerces-C++
> Type: Bug
> Components: Miscellaneous
> Versions: 2.7.0
> Environment: uname -a: Linux aries 2.6.15.4-Abysal #1 SMP Wed Feb 22 17:28:51 CET 2006 i686 GNU/Linux
> Reporter: jose
> Priority: Minor
>
> The small example program below demonstrates the problem:
> ---------------------------------------------------------------------------------------------------------------------
> #include <stdio.h>
> #include <locale.h>
> #include <xercesc/util/PlatformUtils.hpp>
> #include <xercesc/util/XMLString.hpp>
> #include <xercesc/dom/DOM.hpp>
> #include <xercesc/framework/MemBufInputSource.hpp>
> #include <xercesc/parsers/XercesDOMParser.hpp>
> #include <xercesc/framework/StdOutFormatTarget.hpp>
> const XMLCh pCore[] = { 'C', 'o', 'r', 'e', '\0' };
> const XMLCh pRoot[] = { 'r', 'o', 'o', 't', '\0' };
> const XMLCh pVers[] = { '1', '.', '0', '\0' };
> const XMLCh pEnc [] = { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' };
> const XMLCh pLS [] = { 'L', 'S', '\0' };
> const XMLCh pLF [] = { '\n', '\0' };
> using namespace xercesc;
> void printf_xml( const DOMDocument *doc )
> {
> DOMImplementation *irl = DOMImplementationRegistry::getDOMImplementation( pLS );
> DOMWriter *theSerializer = ((DOMImplementationLS *)irl)->createDOMWriter();
> theSerializer->setEncoding( pEnc );
> XMLFormatTarget *xmlft = new StdOutFormatTarget();
> theSerializer->writeNode( xmlft, *doc );
> delete xmlft;
> delete theSerializer;
> }
> int main( int argc, char *argv[] )
> {
> setlocale( LC_ALL, "es_ES@euro" );
> XMLPlatformUtils::Initialize( "es_ES" );
> {
> DOMImplementation *irc = DOMImplementationRegistry::getDOMImplementation( pCore );
> DOMDocument *doc = irc->createDocument( 0, 0, 0 );
> doc->setVersion( pVers );
> doc->setEncoding( pEnc );
> DOMElement *de = doc->createElement( pRoot );
> static XMLCh *pValue = XMLString::transcode( "1.234´56" );
> DOMText *dt = doc->createTextNode( pValue );
> XMLString::release( &pValue );
> de->appendChild( dt );
> doc->appendChild( de );
> printf_xml( doc );
> doc->release();
> }
> XMLPlatformUtils::Terminate();
> return 0;
> }
> ---------------------------------------------------------------------------------------------------------------------
> Keep close attention to the XMLString::transcode() call.
> The value "1.234´56" is passed using the "ACUTE ACCENT" between characters '4' and '5'.
> When the program is run, the result is as follows:
> <?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?><root>1.234Ž56</root>
> Why does the unicode character Ž appear instead of ´?
> Character Ž corresponds to "LATIN CAPITAL LETTER Z WITH CARON"
> and not to "ACUTE ACCENT" that must be ´ on unicode.
--
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org