You are viewing a plain text version of this content. The canonical link for it is here.
Posted to c-dev@xerces.apache.org by "ocean_helen (JIRA)" <xe...@xml.apache.org> on 2014/04/30 10:33:14 UTC
[jira] [Commented] (XERCESC-2030) failed to do validation when
there's Japanese words in the xml file
[ https://issues.apache.org/jira/browse/XERCESC-2030?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13985275#comment-13985275 ]
ocean_helen commented on XERCESC-2030:
--------------------------------------
Thanks for your quick response!
the xml file is encoded with UTF-8, and I can read them successfully from window NotePad++.
Yes, we need to store non-Unicode string in the xml file, but don't need to print data to the console. is it possible to show me an example how to convert it into UTF-8 when doing schema validation? I didn't get sample codes from the website... Thanks again.
> failed to do validation when there's Japanese words in the xml file
> -------------------------------------------------------------------
>
> Key: XERCESC-2030
> URL: https://issues.apache.org/jira/browse/XERCESC-2030
> Project: Xerces-C++
> Issue Type: Bug
> Components: SAX/SAX2
> Environment: SunOS 5.10 Generic_139555-08 sun4u sparc SUNW,Sun-Fire-V245
> xerces C++ 3.1.1
> Reporter: ocean_helen
>
> Hi owners,
> I got a problem when using Xerces C++ 3.1.1 to do schema validation which has Japanese words in the xml file. it raised FatalError: invalid multi-byte sequence and stop validation.
> Environment: Linux
> Locale:
> LANG=
> LC_CTYPE=en_GB.ISO8859-1
> LC_NUMERIC=C
> LC_TIME=en_GB.ISO8859-1
> LC_COLLATE=en_GB.ISO8859-1
> LC_MONETARY=en_GB.ISO8859-1
> LC_MESSAGES=C
> LC_ALL=
> The xml file is generated in linux and because of the business, we couldn't change characterset from ISO8859-1 to UTF-8 from the system side, so do we have any workaround to skip this kind of error, or is it possible to modify characterset to pass the validation in C++?
> All the source codes are attached at below, please let me know if you need any more information.
> Looking forward to your reply and thank you so much in advance.
> Source Code:
> a.xsd:
> ============================================================
> <?xml version="1.0" encoding="UTF-8"?>
> <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
> <xs:element name="phonebook">
> <xs:complexType>
> <xs:sequence>
> <xs:element name="name" minOccurs="1" maxOccurs="1">
> <xs:complexType>
> <xs:sequence>
> <xs:element name="first" type="xs:string"/>
> </xs:sequence>
> </xs:complexType>
> </xs:element>
> </xs:sequence>
> </xs:complexType>
> </xs:element>
> </xs:schema>
> a.xml:
> ============================================================
> <?xml version="1.0" encoding="UTF-8"?>
> <phonebook xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> xsi:noNamespaceSchemaLocation=
> "gobitan.xsd">
> <name>
> <first>円短期</first>
> </name>
> </phonebook>
> val.cpp
> ============================================================
> #include <xercesc/util/PlatformUtils.hpp>
> #include <xercesc/validators/common/Grammar.hpp>
> #include <xercesc/sax2/SAX2XMLReader.hpp>
> #include <xercesc/util/XMLException.hpp>
> #include <xercesc/util/OutOfMemoryException.hpp>
> #include <xercesc/util/XMLString.hpp>
> #include <xercesc/sax2/XMLReaderFactory.hpp>
> #include <stdio.h>
> #include "MyHandler.hpp"
> #if defined(XERCES_NEW_IOSTREAMS)
> #include <iostream>
> #else
> #include <iostream.h>
> #endif
> using namespace std;
> using namespace xercesc;
> //XERCES_CPP_NAMESPACE_USE
> int main( int argc , char** argv )
> {
> XMLPlatformUtils::Initialize(); //.....
> SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
> parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
> parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
> parser->setFeature(XMLUni::fgXercesValidationErrorAsFatal, true);
> parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
> parser->setFeature(XMLUni::fgXercesSchema, true);
> parser->setFeature(XMLUni::fgXercesSchemaFullChecking, true);
> parser->setFeature(XMLUni::fgXercesLoadSchema,true);
> parser->setExitOnFirstFatalError(false);
> parser->loadGrammar ("a.xsd", Grammar::SchemaGrammarType, true);
> MyHandler* handler=new MyHandler();
> parser->setContentHandler(handler);
> parser->setErrorHandler(handler);
> try
> {
> parser->parse("a.xml");
> vector<string> errs=handler->getSchemaErrorContent();
> if(errs.size()>0)
> {
> cout<<"ERROR MESSAGE OF SCHEMA VALIDATION============="<<endl;
> for (unsigned int i = 0; i < errs.size();i++)
> {
> cout<<errs.at(i)<<endl;
> }
> }
> cout<<"END TRY"<<endl;
> }
> catch (const XMLException& toCatch) {
> char* message = XMLString::transcode(toCatch.getMessage());
> cout << "Exception message is: \n"
> << message << "\n";
> XMLString::release(&message);
> return -1;
> }
> catch (const SAXParseException& toCatch) {
> char* message = XMLString::transcode(toCatch.getMessage());
> cout << "Exception message is: \n"
> << message << "\n";
> XMLString::release(&message);
> return -1;
> }
> catch (...) {
> cout << "Unexpected Exception \n" ;
> return -1;
> }
> cout<<"FINISH"<<endl;
> XMLPlatformUtils::Terminate();
> return 0;
> }
> MyHandler.cpp
> ============================================================
> #include "MyHandler.hpp"
> #include <xercesc/sax2/Attributes.hpp>
> #include <xercesc/sax/SAXParseException.hpp>
> #include <xercesc/sax/SAXException.hpp>
> #if defined(XERCES_NEW_IOSTREAMS)
> #include <iostream>
> #else
> #include <iostream.h>
> #endif
> // ---------------------------------------------------------------------------
> // MyHandler: Constructors and Destructor
> // ---------------------------------------------------------------------------
> MyHandler::MyHandler() :
> fAttrCount(0)
> , fCharacterCount(0)
> , fElementCount(0)
> , fSpaceCount(0)
> , fSchemaErrors(false)
> , fSystemException(false)
> , eleName("")
> , eleValue("")
> , curElement("")
> , curValue("")
> , buf("")
> {
> }
> MyHandler::~MyHandler()
> {
> }
> // ---------------------------------------------------------------------------
> // MyHandler: Implementation of the SAX DocumentHandler interface
> // ---------------------------------------------------------------------------
> void MyHandler::startElement(const XMLCh* const uri
> , const XMLCh* const localname
> , const XMLCh* const qname
> , const Attributes& attrs)
> {
> curValue = "";
> curElement="";
> curElement=XMLString::transcode(localname);
> elementList.push_back(curElement);
> fElementCount++;
> fAttrCount += attrs.getLength();
> }
> void MyHandler::endElement( const XMLCh* const uri
> , const XMLCh* const localname
> , const XMLCh* const qname)
> {
> curElement = XMLString::transcode(localname);
> elementList.remove(curElement);
> }
> void MyHandler::characters( const XMLCh* const chars
> , const XMLSize_t length)
> {
> fCharacterCount += length;
> curValue = StrUtil(chars);
> }
> void MyHandler::ignorableWhitespace( const XMLCh* const /* chars */
> , const XMLSize_t length)
> {
> fSpaceCount += length;
> }
> void MyHandler::startDocument()
> {
> fAttrCount = 0;
> fCharacterCount = 0;
> fElementCount = 0;
> fSpaceCount = 0;
> eleName="";
> eleValue="";
> curElement="";
> curValue="";
> elementList.clear();
> cout<<"Start to Parse File*****"<<endl;
> }
> void MyHandler::endDocument()
> {
> cout<<"Finish Parse File*****"<<endl;
> }
> // ---------------------------------------------------------------------------
> // MyHandler: Overrides of the SAX ErrorHandler interface
> // ---------------------------------------------------------------------------
> void MyHandler::error(const SAXParseException& e)
> {
> string tmp;
> string message = StrUtil(e.getMessage());
> tmp.append( "Error: " +message);
> tmp.append( " curElement = [" + curElement + "] element, curValue = ["+ curValue+ "].");
> vSchemaErrorContent.push_back(tmp);
> cout<<"ERROR======================== msg = ["<<tmp<<"]."<<endl;
> }
> void MyHandler::fatalError(const SAXParseException& e)
> {
> fSchemaErrors = true;
> char* message = XMLString::transcode(e.getMessage());
> cout << "Fatal Error: " << message << " at line: " << e.getLineNumber()<< endl;
> cout<<"FATAL ERROR============================ msg = ["<<message<<"]."<<endl;
> XMLString::release(&message);
> }
> void MyHandler::warning(const SAXParseException& e)
> {
> char* message = XMLString::transcode(e.getMessage());
> cout << "Warning : " << message<< " at line: " << e.getLineNumber()<< endl;
> XMLString::release(&message);
> }
> void MyHandler::resetErrors()
> {
> fSchemaErrors = false;
> fSystemException = false;
> vSchemaErrorContent.clear();
> vSystemErrorContent.clear();
> }
> MyHandler.hpp
> ============================================================
> #include <xercesc/sax2/Attributes.hpp>
> #include <xercesc/sax2/DefaultHandler.hpp>
> #include <string>
> #include <vector>
> #include <list>
> #include <sstream>
> using namespace std;
> XERCES_CPP_NAMESPACE_USE
> class MyHandler : public DefaultHandler
> {
> public:
> // -----------------------------------------------------------------------
> // Constructors and Destructor
> // -----------------------------------------------------------------------
> MyHandler();
> ~MyHandler();
> // -----------------------------------------------------------------------
> // Getter methods
> // -----------------------------------------------------------------------
> XMLSize_t getElementCount() const
> {
> return fElementCount;
> }
> XMLSize_t getAttrCount() const
> {
> return fAttrCount;
> }
> XMLSize_t getCharacterCount() const
> {
> return fCharacterCount;
> }
> XMLSize_t getSpaceCount() const
> {
> return fSpaceCount;
> }
> bool hasfSchemaErrors() const{
> return fSchemaErrors;
> }
> bool hasfSystemException() const{
> return fSystemException;
> }
> vector<string> getSchemaErrorContent() const {
> return vSchemaErrorContent;
> }
> vector<string> getSystemErrorContent() const {
> return vSystemErrorContent;
> }
> void startElement(const XMLCh* const uri, const XMLCh* const localname, const XMLCh* const qname, const Attributes& attrs);
> void endElement(const XMLCh* const uri,const XMLCh* const localname,const XMLCh* const qname ) ;
> void characters(const XMLCh* const chars, const XMLSize_t length);
> void ignorableWhitespace(const XMLCh* const chars, const XMLSize_t length);
> void startDocument();
> void endDocument();
> void warning(const SAXParseException& exc);
> void error(const SAXParseException& exc);
> void fatalError(const SAXParseException& exc);
> void resetErrors();
> private:
> XMLSize_t fAttrCount;
> XMLSize_t fCharacterCount;
> XMLSize_t fElementCount;
> XMLSize_t fSpaceCount;
> bool fSchemaErrors;
> bool fSystemException;
> vector<string> vSchemaErrorContent;
> vector<string> vSystemErrorContent;
> string curElement;
> string curValue;
> list<string> elementList;
> };
--
This message was sent by Atlassian JIRA
(v6.2#6252)
---------------------------------------------------------------------
To unsubscribe, e-mail: c-dev-unsubscribe@xerces.apache.org
For additional commands, e-mail: c-dev-help@xerces.apache.org