You are viewing a plain text version of this content. The canonical link for it is here.
Posted to c-dev@xerces.apache.org by "jose (JIRA)" <xe...@xml.apache.org> on 2006/03/01 12:51:41 UTC

[jira] Created: (XERCESC-1571) Problem with acute accent unicode Latin character

Problem with acute accent unicode Latin character
-------------------------------------------------

         Key: XERCESC-1571
         URL: http://issues.apache.org/jira/browse/XERCESC-1571
     Project: Xerces-C++
        Type: Bug
  Components: Miscellaneous  
    Versions: 2.7.0    
 Environment: uname -a: Linux aries 2.6.15.4-Abysal #1 SMP Wed Feb 22 17:28:51 CET 2006 i686 GNU/Linux 
    Reporter: jose
    Priority: Minor


The small example program below demonstrates the problem: 
--------------------------------------------------------------------------------------------------------------------- 
#include <stdio.h> 
#include <locale.h> 
#include <xercesc/util/PlatformUtils.hpp> 
#include <xercesc/util/XMLString.hpp> 
#include <xercesc/dom/DOM.hpp> 
#include <xercesc/framework/MemBufInputSource.hpp> 
#include <xercesc/parsers/XercesDOMParser.hpp> 
#include <xercesc/framework/StdOutFormatTarget.hpp> 

const XMLCh pCore[] = { 'C', 'o', 'r', 'e', '\0' }; 
const XMLCh pRoot[] = { 'r', 'o', 'o', 't', '\0' }; 
const XMLCh pVers[] = { '1', '.', '0', '\0' }; 
const XMLCh pEnc [] = { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' }; 
const XMLCh pLS [] = { 'L', 'S', '\0' }; 
const XMLCh pLF [] = { '\n', '\0' }; 

using namespace xercesc; 

void printf_xml( const DOMDocument *doc ) 
{ 
   DOMImplementation *irl = DOMImplementationRegistry::getDOMImplementation( pLS ); 
   DOMWriter *theSerializer = ((DOMImplementationLS *)irl)->createDOMWriter(); 
   theSerializer->setEncoding( pEnc ); 
   XMLFormatTarget *xmlft = new StdOutFormatTarget(); 
   theSerializer->writeNode( xmlft, *doc ); 
   delete xmlft; 
   delete theSerializer; 
} 

int main( int argc, char *argv[] ) 
{ 
   setlocale( LC_ALL, "es_ES@euro" ); 

   XMLPlatformUtils::Initialize( "es_ES" ); 
   { 

      DOMImplementation *irc = DOMImplementationRegistry::getDOMImplementation( pCore ); 

      DOMDocument *doc = irc->createDocument( 0, 0, 0 ); 

      doc->setVersion( pVers ); 
      doc->setEncoding( pEnc ); 

      DOMElement *de = doc->createElement( pRoot ); 

      static XMLCh *pValue = XMLString::transcode( "1.234´56" ); 
      DOMText *dt = doc->createTextNode( pValue ); 
      XMLString::release( &pValue ); 

      de->appendChild( dt ); 

      doc->appendChild( de ); 

      printf_xml( doc ); 

      doc->release(); 

   } 
   XMLPlatformUtils::Terminate(); 
   return 0; 
} 
--------------------------------------------------------------------------------------------------------------------- 
Keep close attention to the XMLString::transcode() call. 
The value "1.234´56" is passed using the "ACUTE ACCENT" between characters '4' and '5'. 
When the program is run, the result is as follows: 
<?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?><root>1.234&#x17D;56</root> 
Why does the unicode character &#x17D; appear instead of &#180;? 
Character &#x17D; corresponds to "LATIN CAPITAL LETTER Z WITH CARON" 
and not to "ACUTE ACCENT" that must be &#180; on unicode.


-- 
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
   http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
   http://www.atlassian.com/software/jira


---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org


[jira] Resolved: (XERCESC-1571) Problem with acute accent unicode Latin character

Posted by "Alberto Massari (JIRA)" <xe...@xml.apache.org>.
     [ http://issues.apache.org/jira/browse/XERCESC-1571?page=all ]
     
Alberto Massari resolved XERCESC-1571:
--------------------------------------

    Resolution: Invalid

First, I have to say that what you are trying to do is not healthy; trying to force a locale so that you can transcode a string using XMLString::transcode has several drawbacks, and won't always work (the system could not allow es_ES as a locale, for instance); you should instead create your own transcoder for the desidered locale and use its transcodeFrom/transcodeTo methods.

Having said this, your code could work if:
- you have executed runConfigure with the option -t IconvGNU 
- your replace the setlocale(LC_ALL, "es_ES") with setenv("LC_ALL","es_ES",q)



> Problem with acute accent unicode Latin character
> -------------------------------------------------
>
>          Key: XERCESC-1571
>          URL: http://issues.apache.org/jira/browse/XERCESC-1571
>      Project: Xerces-C++
>         Type: Bug
>   Components: Miscellaneous
>     Versions: 2.7.0
>  Environment: uname -a: Linux aries 2.6.15.4-Abysal #1 SMP Wed Feb 22 17:28:51 CET 2006 i686 GNU/Linux 
>     Reporter: jose
>     Priority: Minor

>
> The small example program below demonstrates the problem: 
> --------------------------------------------------------------------------------------------------------------------- 
> #include <stdio.h> 
> #include <locale.h> 
> #include <xercesc/util/PlatformUtils.hpp> 
> #include <xercesc/util/XMLString.hpp> 
> #include <xercesc/dom/DOM.hpp> 
> #include <xercesc/framework/MemBufInputSource.hpp> 
> #include <xercesc/parsers/XercesDOMParser.hpp> 
> #include <xercesc/framework/StdOutFormatTarget.hpp> 
> const XMLCh pCore[] = { 'C', 'o', 'r', 'e', '\0' }; 
> const XMLCh pRoot[] = { 'r', 'o', 'o', 't', '\0' }; 
> const XMLCh pVers[] = { '1', '.', '0', '\0' }; 
> const XMLCh pEnc [] = { 'I', 'S', 'O', '-', '8', '8', '5', '9', '-', '1', '\0' }; 
> const XMLCh pLS [] = { 'L', 'S', '\0' }; 
> const XMLCh pLF [] = { '\n', '\0' }; 
> using namespace xercesc; 
> void printf_xml( const DOMDocument *doc ) 
> { 
>    DOMImplementation *irl = DOMImplementationRegistry::getDOMImplementation( pLS ); 
>    DOMWriter *theSerializer = ((DOMImplementationLS *)irl)->createDOMWriter(); 
>    theSerializer->setEncoding( pEnc ); 
>    XMLFormatTarget *xmlft = new StdOutFormatTarget(); 
>    theSerializer->writeNode( xmlft, *doc ); 
>    delete xmlft; 
>    delete theSerializer; 
> } 
> int main( int argc, char *argv[] ) 
> { 
>    setlocale( LC_ALL, "es_ES@euro" ); 
>    XMLPlatformUtils::Initialize( "es_ES" ); 
>    { 
>       DOMImplementation *irc = DOMImplementationRegistry::getDOMImplementation( pCore ); 
>       DOMDocument *doc = irc->createDocument( 0, 0, 0 ); 
>       doc->setVersion( pVers ); 
>       doc->setEncoding( pEnc ); 
>       DOMElement *de = doc->createElement( pRoot ); 
>       static XMLCh *pValue = XMLString::transcode( "1.234´56" ); 
>       DOMText *dt = doc->createTextNode( pValue ); 
>       XMLString::release( &pValue ); 
>       de->appendChild( dt ); 
>       doc->appendChild( de ); 
>       printf_xml( doc ); 
>       doc->release(); 
>    } 
>    XMLPlatformUtils::Terminate(); 
>    return 0; 
> } 
> --------------------------------------------------------------------------------------------------------------------- 
> Keep close attention to the XMLString::transcode() call. 
> The value "1.234´56" is passed using the "ACUTE ACCENT" between characters '4' and '5'. 
> When the program is run, the result is as follows: 
> <?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?><root>1.234&#x17D;56</root> 
> Why does the unicode character &#x17D; appear instead of &#180;? 
> Character &#x17D; corresponds to "LATIN CAPITAL LETTER Z WITH CARON" 
> and not to "ACUTE ACCENT" that must be &#180; on unicode.

-- 
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
   http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
   http://www.atlassian.com/software/jira


---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org