You are viewing a plain text version of this content. The canonical link for it is here.
Posted to c-dev@xerces.apache.org by "Alberto Massari (JIRA)" <xe...@xml.apache.org> on 2014/04/30 10:23:15 UTC

[jira] [Resolved] (XERCESC-2030) failed to do validation when there's Japanese words in the xml file

     [ https://issues.apache.org/jira/browse/XERCESC-2030?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Alberto Massari resolved XERCESC-2030.
--------------------------------------

    Resolution: Not a Problem

Your code is doing a dangerous thing: using XMLString::trascode in the SAX callback. This tries to convert a Unicode string into a locale that you cannot control and that, in your case, is unable to represent non-European characters. If you really need to store non-Unicode strings in a stack, please convert them into UTF-8, and never use XMLString::transcode, unless you are preparing to print data to the console

> failed to do validation when there's Japanese words in the xml file
> -------------------------------------------------------------------
>
>                 Key: XERCESC-2030
>                 URL: https://issues.apache.org/jira/browse/XERCESC-2030
>             Project: Xerces-C++
>          Issue Type: Bug
>          Components: SAX/SAX2
>         Environment: SunOS 5.10 Generic_139555-08 sun4u sparc SUNW,Sun-Fire-V245
> xerces C++ 3.1.1
>            Reporter: ocean_helen
>
> Hi owners,
>      I got a problem when using Xerces C++ 3.1.1 to do schema validation which has Japanese words in the xml file.  it raised FatalError: invalid multi-byte sequence and stop validation.
>      Environment: Linux
>      Locale:
>         LANG=
>         LC_CTYPE=en_GB.ISO8859-1
>         LC_NUMERIC=C
>         LC_TIME=en_GB.ISO8859-1
>         LC_COLLATE=en_GB.ISO8859-1
>         LC_MONETARY=en_GB.ISO8859-1
>         LC_MESSAGES=C
>         LC_ALL=
>     The xml file is generated in linux and because of the business, we couldn't change characterset from ISO8859-1 to UTF-8 from the system side, so do we have any workaround to skip this kind of error,  or is it possible to modify characterset to pass the validation in C++?
>      All the source codes are attached at below, please let me know if you need any more information.
>      Looking forward to your reply and thank you so much in advance.
> Source Code: 
> a.xsd:
> ============================================================
> <?xml version="1.0" encoding="UTF-8"?>
> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
>  <xs:element name="phonebook">
>     <xs:complexType>
>       <xs:sequence>
>         <xs:element name="name" minOccurs="1" maxOccurs="1">
>           <xs:complexType>
>             <xs:sequence>
>               <xs:element name="first" type="xs:string"/>
>             </xs:sequence>
>           </xs:complexType>
>         </xs:element>
>       </xs:sequence>
>     </xs:complexType>
>  </xs:element>
> </xs:schema>
> a.xml:
> ============================================================
> <?xml version="1.0" encoding="UTF-8"?>
> <phonebook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> xsi:noNamespaceSchemaLocation=
> "gobitan.xsd">
>  <name>
>     <first>円短期</first>
> </name>
> </phonebook>
> val.cpp
> ============================================================
> #include <xercesc/util/PlatformUtils.hpp>
> #include <xercesc/validators/common/Grammar.hpp>
> #include <xercesc/sax2/SAX2XMLReader.hpp>
> #include <xercesc/util/XMLException.hpp>
> #include <xercesc/util/OutOfMemoryException.hpp>
> #include <xercesc/util/XMLString.hpp>
> #include <xercesc/sax2/XMLReaderFactory.hpp>
> #include <stdio.h>
> #include "MyHandler.hpp"
> #if defined(XERCES_NEW_IOSTREAMS)
> #include <iostream>
> #else
> #include <iostream.h>
> #endif
> using namespace std;
> using namespace xercesc;
> //XERCES_CPP_NAMESPACE_USE
> int main( int argc , char** argv )
> {
>        XMLPlatformUtils::Initialize(); //.....
>        SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
>        parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
>     parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
>        parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
>        parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
>     parser->setFeature(XMLUni::fgXercesSchema, true);
>     parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
> parser->setFeature(XMLUni::fgXercesLoadSchema,true);
> parser->setExitOnFirstFatalError(false);
>  parser->loadGrammar ("a.xsd", Grammar::SchemaGrammarType, true);
> MyHandler* handler=new MyHandler();
>         parser->setContentHandler(handler);
> parser->setErrorHandler(handler);
>            try
>            {
>        parser->parse("a.xml");
> vector<string> errs=handler->getSchemaErrorContent();
> if(errs.size()>0)
> {
> cout<<"ERROR MESSAGE OF SCHEMA VALIDATION============="<<endl;
> for (unsigned int i = 0; i < errs.size();i++)
> {
> cout<<errs.at(i)<<endl;
> }
> }
> cout<<"END TRY"<<endl;
>  }
>         catch (const XMLException& toCatch) {
>             char* message = XMLString::transcode(toCatch.getMessage());
>             cout << "Exception message is: \n"
>                  << message << "\n";
>             XMLString::release(&message);
>             return -1;
>         }
>         catch (const SAXParseException& toCatch) {
>             char* message = XMLString::transcode(toCatch.getMessage());
>             cout << "Exception message is: \n"
>                  << message << "\n";
>             XMLString::release(&message);
>             return -1;
>         }
>         catch (...) {
>             cout << "Unexpected Exception \n" ;
>             return -1;
>         }
>                 cout<<"FINISH"<<endl;
>        XMLPlatformUtils::Terminate();
>        return 0;
> }
> MyHandler.cpp
> ============================================================
> #include "MyHandler.hpp"
> #include <xercesc/sax2/Attributes.hpp>
> #include <xercesc/sax/SAXParseException.hpp>
> #include <xercesc/sax/SAXException.hpp>
> #if defined(XERCES_NEW_IOSTREAMS)
> #include <iostream>
> #else
> #include <iostream.h>
> #endif
> // ---------------------------------------------------------------------------
> //  MyHandler: Constructors and Destructor
> // ---------------------------------------------------------------------------
> MyHandler::MyHandler() :
> fAttrCount(0)
>         , fCharacterCount(0)
>         , fElementCount(0)
>         , fSpaceCount(0)
>         , fSchemaErrors(false)
>         , fSystemException(false)
>         , eleName("")
>         , eleValue("")
>         , curElement("")
>         , curValue("")
>         , buf("")
> {
> }
> MyHandler::~MyHandler()
> {
> }
> // ---------------------------------------------------------------------------
> //  MyHandler: Implementation of the SAX DocumentHandler interface
> // ---------------------------------------------------------------------------
> void MyHandler::startElement(const XMLCh* const  uri
>         , const XMLCh* const  localname
>         , const XMLCh* const  qname
>         , const Attributes& attrs)
> {
>         curValue = "";
>         curElement="";
>         curElement=XMLString::transcode(localname);
>         elementList.push_back(curElement);
>         fElementCount++;
>         fAttrCount += attrs.getLength();
> }
> void MyHandler::endElement( const XMLCh* const uri
>         , const XMLCh* const localname
>         , const XMLCh* const qname)
> {
>         curElement = XMLString::transcode(localname);
>         elementList.remove(curElement);
>         }
> void MyHandler::characters(  const   XMLCh* const chars
>         , const XMLSize_t length)
> {
>         fCharacterCount += length;
>         curValue = StrUtil(chars);
> }
> void MyHandler::ignorableWhitespace( const   XMLCh* const /* chars */
>         , const XMLSize_t length)
> {
>         fSpaceCount += length;
> }
> void MyHandler::startDocument()
> {
>         fAttrCount = 0;
>         fCharacterCount = 0;
>         fElementCount = 0;
>         fSpaceCount = 0;
>         eleName="";
>         eleValue="";
>         curElement="";
>         curValue="";
>         elementList.clear();
>         cout<<"Start to Parse File*****"<<endl;
> }
> void MyHandler::endDocument()
> {
>         cout<<"Finish Parse File*****"<<endl;
> }
> // ---------------------------------------------------------------------------
> //  MyHandler: Overrides of the SAX ErrorHandler interface
> // ---------------------------------------------------------------------------
> void MyHandler::error(const SAXParseException& e)
> {
> string tmp;
>  string message = StrUtil(e.getMessage());
>     tmp.append( "Error: " +message);
>          tmp.append( " curElement = [" + curElement + "] element, curValue = ["+ curValue+ "].");
> vSchemaErrorContent.push_back(tmp);
> cout<<"ERROR======================== msg = ["<<tmp<<"]."<<endl;
> }
> void MyHandler::fatalError(const SAXParseException& e)
> {
>         fSchemaErrors = true;
>  char* message = XMLString::transcode(e.getMessage());
>     cout << "Fatal Error: " << message << " at line: " << e.getLineNumber()<< endl;
> cout<<"FATAL ERROR============================ msg = ["<<message<<"]."<<endl;
>  XMLString::release(&message);
> }
> void MyHandler::warning(const SAXParseException& e)
> {
>  char* message = XMLString::transcode(e.getMessage());
>     cout << "Warning : " << message<< " at line: " << e.getLineNumber()<< endl;
>     XMLString::release(&message);
> }
> void MyHandler::resetErrors()
> {
>         fSchemaErrors = false;
>         fSystemException = false;
>         vSchemaErrorContent.clear();
>         vSystemErrorContent.clear();
> }
> MyHandler.hpp
> ============================================================
> #include <xercesc/sax2/Attributes.hpp>
> #include <xercesc/sax2/DefaultHandler.hpp>
> #include <string>
> #include <vector>
> #include <list>
> #include <sstream>
> using namespace std;
> XERCES_CPP_NAMESPACE_USE
> class MyHandler : public DefaultHandler
> {
> public:
>         // -----------------------------------------------------------------------
>         //  Constructors and Destructor
>         // -----------------------------------------------------------------------
>         MyHandler();
>         ~MyHandler();
>         // -----------------------------------------------------------------------
>         //  Getter methods
>         // -----------------------------------------------------------------------
>         XMLSize_t getElementCount() const
>         {
>                 return fElementCount;
>         }
>         XMLSize_t getAttrCount() const
>         {
>                 return fAttrCount;
>         }
>         XMLSize_t getCharacterCount() const
>         {
>                 return fCharacterCount;
>         }
>         XMLSize_t getSpaceCount() const
>         {
>                 return fSpaceCount;
>         }
>         bool            hasfSchemaErrors() const{
>                 return fSchemaErrors;
>         }
>         bool            hasfSystemException() const{
>                 return fSystemException;
>         }
>         vector<string>  getSchemaErrorContent() const {
>                 return vSchemaErrorContent;
>         }
>         vector<string>  getSystemErrorContent() const {
>                 return vSystemErrorContent;
>         }
>         void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
> void endElement(const XMLCh* const uri,const XMLCh* const localname,const XMLCh* const qname ) ;
>         void characters(const XMLCh* const chars, const XMLSize_t length);
>         void ignorableWhitespace(const XMLCh* const chars, const XMLSize_t length);
>         void startDocument();
>         void endDocument();
>         void warning(const SAXParseException& exc);
>         void error(const SAXParseException& exc);
>         void fatalError(const SAXParseException& exc);
>         void resetErrors();
> private:
>         XMLSize_t     fAttrCount;
>         XMLSize_t     fCharacterCount;
>         XMLSize_t     fElementCount;
>         XMLSize_t               fSpaceCount;
>         bool            fSchemaErrors;
>         bool            fSystemException;
>         vector<string>                  vSchemaErrorContent;
>         vector<string>                  vSystemErrorContent;
>         string curElement;
>         string curValue;
>         list<string>    elementList;
> };



--
This message was sent by Atlassian JIRA
(v6.2#6252)

---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org