You are viewing a plain text version of this content. The canonical link for it is here.
Posted to c-dev@xerces.apache.org by Mark Everline <ma...@level8.com> on 2001/02/12 20:23:24 UTC
Core when UTF-16 encoding Controdicts actual encoding.....
Hi,
In any message when the Encoding controdicts the actuall encoding the
the parser core dumps because the XMLReader::setEncoding(...) deletes
the fEncodingStr before error check and leaving the variable in a
invalid
state.
Problem: I have a customer whom decicded to do the following:
<?xml version="1.0" encoding="UTF-16"?>
the only problem is the actuall encoding is UTF-8. When the parser reads
in the file you get the following warning:
Warning at file
"/home/ox3/snap/everline/snap/sassaby/snap/s/tsto/xml/input/bad/Tina_Koslovsky.xml",
line 1, column 40
Message: Encoding (UTF-16, from XMLDecl or manually set) contradicts
the auto-sensed encoding, ignoring it
the parser then latter core dumps when trieing to set the Transcoder
because
the fEncodingStr is 0. I have attached the xml file and the fix to
XMLReader.
Not sure weather this is a bug/problem or not mainly because the XML is
questionable in the first place. Also I am using the DOMParser.
Mark.
Changes To XMLReader.cpp: ( marked with // mee )
bool XMLReader::setEncoding(const XMLCh* const newEncoding)
{
//
// If the encoding was forced, then we ignore the new value and
just
// return with success. If it was forced, then we are to use that
// encoding without question. Note that, if we are forced, we
created
// a transcoder up front so there is no need to do one here in that
// case.
//
if (fForcedEncoding)
return true;
// Clean up the old encoding string
// mee - don't delete until we known we have a good encoding.
// if (fEncodingStr)
// {
// delete [] fEncodingStr;
// fEncodingStr = 0;
// }
//
// Try to map the string to one of our standard encodings. If its
not
// one of them, then it has to be one of the non-intrinsic
encodings,
// in which case we have to delete our intrinsic encoder and create
a
// new one.
//
XMLRecognizer::Encodings newBaseEncoding =
XMLRecognizer::encodingForName
(
newEncoding
);
//
// If it does not come back as one of the auto-sensed encodings,
then we
// have to possibly replace it and at least check a few things.
//
if (newBaseEncoding == XMLRecognizer::OtherEncoding)
{
//
// Check for non-endian specific UTF-16 or UCS-4. If so, and if
we
// are already in one of the endian versions of those
encodings,
// then just keep it and go on. Otherwise, its not valid.
//
if (!XMLString::compareIString(newEncoding,
XMLUni::fgUTF16EncodingString)
|| !XMLString::compareIString(newEncoding,
XMLUni::fgUTF16EncodingString2)
|| !XMLString::compareIString(newEncoding,
XMLUni::fgUTF16EncodingString3)
|| !XMLString::compareIString(newEncoding,
XMLUni::fgUTF16EncodingString4))
{
if ((fEncoding != XMLRecognizer::UTF_16L)
&& (fEncoding != XMLRecognizer::UTF_16B))
{
return false;
}
// Override with the original endian specific encoding
newBaseEncoding = fEncoding;
if (fEncoding == XMLRecognizer::UTF_16L) {
delete [] fEncodingStr; // mee
fEncodingStr =
XMLString::replicate(XMLUni::fgUTF16LEncodingString);
}
else {
delete [] fEncodingStr; // mee
fEncodingStr =
XMLString::replicate(XMLUni::fgUTF16BEncodingString);
}
}
else if (!XMLString::compareIString(newEncoding,
XMLUni::fgUCS4EncodingString)
|| !XMLString::compareIString(newEncoding,
XMLUni::fgUCS4EncodingString2)
|| !XMLString::compareIString(newEncoding,
XMLUni::fgUCS4EncodingString3))
{
if ((fEncoding != XMLRecognizer::UCS_4L)
&& (fEncoding != XMLRecognizer::UCS_4B))
{
return false;
}
// Override with the original endian specific encoding
newBaseEncoding = fEncoding;
if (fEncoding == XMLRecognizer::UCS_4L) {
delete [] fEncodingStr; // mee
fEncodingStr =
XMLString::replicate(XMLUni::fgUCS4LEncodingString);
}
else {
delete [] fEncodingStr; // mee
fEncodingStr =
XMLString::replicate(XMLUni::fgUCS4BEncodingString);
}
}
else
{
// None of those special cases, so just replicate the new
name
delete [] fEncodingStr; // mee
fEncodingStr = XMLString::replicate(newEncoding);
}
}
else
{
// Store the new encoding string since it is just an intrinsic
delete [] fEncodingStr; // mee
fEncodingStr = XMLString::replicate(newEncoding);
}
//
// Now we can create a transcoder using the transcoding service. We
// might get back a transcoder for an intrinsically supported
encoding,
// or we might get one from the underlying transcoding service.
//
XMLTransService::Codes failReason;
fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
(
fEncodingStr
, failReason
, kCharBufSize
);
if (!fTranscoder)
ThrowXML1(TranscodingException,
XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr);
// Update the base encoding member with the new base encoding found
fEncoding = newBaseEncoding;
// Looks ok to us
return true;
}
Test .xml File: (and yes I known there are other problems with the XML)
<?xml version="1.0" encoding="UTF-16"?>
<!DOCTYPE OBJECTS [
<!ELEMENT OBJECTS ( CUSTOMER|ORDER)*>
<!ELEMENT CUSTOMER EMPTY >
<!ATTLIST CUSTOMER
Orders IDREFS #IMPLIED
Id CDATA #IMPLIED
FirstName CDATA #IMPLIED
LastName CDATA #IMPLIED
CompanyName CDATA #IMPLIED
__objName ID #IMPLIED>
<!ELEMENT ORDER EMPTY >
<!ATTLIST ORDER
Id CDATA #IMPLIED
ArticleName CDATA #IMPLIED
Amount CDATA #IMPLIED
__objName ID #IMPLIED>
]>
<OBJECTS>
<CUSTOMER
__objName="CUSTOMER__"
Orders="ORDER__1 ORDER__2 "
Id="3"
FirstName="Tina"
LastName="Koslovsky"
CompanyName="Irgendwo GmbH&oKG"/>
<ORDER
__objName="ORDER__1"
Id="6"
ArticleName="Fruchtquark"
Amount="45"/>
<ORDER
__objName="ORDER__2"
Id="7"
ArticleName="Romadour"
Amount="20"/>
</OBJECTS>
--
#-------------------------------------------------------------------------
# Phone: 703-925-1487(w) # 703-729-4463(h)
# email: mark.everline@level8.com (w) # meverline@loudoun.com (h)
#-------------------------------------------------------------------------
#
# Any day you get to breath underwater is a good day.
# - A Scuba Divers motto
#
#-------------------------------------------------------------------------
Re: Core when UTF-16 encoding Controdicts actual encoding.....
Posted by Khaled Noaman <kn...@ca.ibm.com>.
Mark,
The fix is now in CVS. Thanks
Khaled
Mark Everline wrote:
> Hi,
>
> In any message when the Encoding controdicts the actuall encoding the
> the parser core dumps because the XMLReader::setEncoding(...) deletes
> the fEncodingStr before error check and leaving the variable in a
> invalid
> state.
>
> Problem: I have a customer whom decicded to do the following:
> <?xml version="1.0" encoding="UTF-16"?>
>
> the only problem is the actuall encoding is UTF-8. When the parser reads
> in the file you get the following warning:
>
> Warning at file
> "/home/ox3/snap/everline/snap/sassaby/snap/s/tsto/xml/input/bad/Tina_Koslovsky.xml",
> line 1, column 40
> Message: Encoding (UTF-16, from XMLDecl or manually set) contradicts
> the auto-sensed encoding, ignoring it
>
> the parser then latter core dumps when trieing to set the Transcoder
> because
> the fEncodingStr is 0. I have attached the xml file and the fix to
> XMLReader.
> Not sure weather this is a bug/problem or not mainly because the XML is
> questionable in the first place. Also I am using the DOMParser.
>
>
>
> Mark.
>
> Changes To XMLReader.cpp: ( marked with // mee )
>
> bool XMLReader::setEncoding(const XMLCh* const newEncoding)
> {
> //
> // If the encoding was forced, then we ignore the new value and
> just
> // return with success. If it was forced, then we are to use that
> // encoding without question. Note that, if we are forced, we
> created
> // a transcoder up front so there is no need to do one here in that
> // case.
> //
> if (fForcedEncoding)
> return true;
>
> // Clean up the old encoding string
> // mee - don't delete until we known we have a good encoding.
> // if (fEncodingStr)
> // {
> // delete [] fEncodingStr;
> // fEncodingStr = 0;
> // }
>
> //
> // Try to map the string to one of our standard encodings. If its
> not
> // one of them, then it has to be one of the non-intrinsic
> encodings,
> // in which case we have to delete our intrinsic encoder and create
> a
> // new one.
> //
> XMLRecognizer::Encodings newBaseEncoding =
> XMLRecognizer::encodingForName
> (
> newEncoding
> );
>
> //
> // If it does not come back as one of the auto-sensed encodings,
> then we
> // have to possibly replace it and at least check a few things.
> //
> if (newBaseEncoding == XMLRecognizer::OtherEncoding)
> {
> //
> // Check for non-endian specific UTF-16 or UCS-4. If so, and if
> we
> // are already in one of the endian versions of those
> encodings,
> // then just keep it and go on. Otherwise, its not valid.
> //
> if (!XMLString::compareIString(newEncoding,
> XMLUni::fgUTF16EncodingString)
> || !XMLString::compareIString(newEncoding,
> XMLUni::fgUTF16EncodingString2)
> || !XMLString::compareIString(newEncoding,
> XMLUni::fgUTF16EncodingString3)
> || !XMLString::compareIString(newEncoding,
> XMLUni::fgUTF16EncodingString4))
> {
> if ((fEncoding != XMLRecognizer::UTF_16L)
> && (fEncoding != XMLRecognizer::UTF_16B))
> {
> return false;
> }
>
> // Override with the original endian specific encoding
> newBaseEncoding = fEncoding;
>
> if (fEncoding == XMLRecognizer::UTF_16L) {
> delete [] fEncodingStr; // mee
> fEncodingStr =
> XMLString::replicate(XMLUni::fgUTF16LEncodingString);
> }
> else {
> delete [] fEncodingStr; // mee
> fEncodingStr =
> XMLString::replicate(XMLUni::fgUTF16BEncodingString);
> }
> }
> else if (!XMLString::compareIString(newEncoding,
> XMLUni::fgUCS4EncodingString)
> || !XMLString::compareIString(newEncoding,
> XMLUni::fgUCS4EncodingString2)
> || !XMLString::compareIString(newEncoding,
> XMLUni::fgUCS4EncodingString3))
> {
> if ((fEncoding != XMLRecognizer::UCS_4L)
> && (fEncoding != XMLRecognizer::UCS_4B))
> {
> return false;
> }
>
> // Override with the original endian specific encoding
> newBaseEncoding = fEncoding;
>
> if (fEncoding == XMLRecognizer::UCS_4L) {
> delete [] fEncodingStr; // mee
> fEncodingStr =
> XMLString::replicate(XMLUni::fgUCS4LEncodingString);
> }
> else {
> delete [] fEncodingStr; // mee
> fEncodingStr =
> XMLString::replicate(XMLUni::fgUCS4BEncodingString);
> }
> }
> else
> {
> // None of those special cases, so just replicate the new
> name
> delete [] fEncodingStr; // mee
> fEncodingStr = XMLString::replicate(newEncoding);
> }
> }
> else
> {
> // Store the new encoding string since it is just an intrinsic
> delete [] fEncodingStr; // mee
> fEncodingStr = XMLString::replicate(newEncoding);
> }
>
> //
> // Now we can create a transcoder using the transcoding service. We
> // might get back a transcoder for an intrinsically supported
> encoding,
> // or we might get one from the underlying transcoding service.
> //
> XMLTransService::Codes failReason;
> fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
> (
> fEncodingStr
> , failReason
> , kCharBufSize
> );
>
> if (!fTranscoder)
> ThrowXML1(TranscodingException,
> XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr);
>
> // Update the base encoding member with the new base encoding found
> fEncoding = newBaseEncoding;
>
> // Looks ok to us
> return true;
> }
>
> Test .xml File: (and yes I known there are other problems with the XML)
>
> <?xml version="1.0" encoding="UTF-16"?>
>
> <!DOCTYPE OBJECTS [
> <!ELEMENT OBJECTS ( CUSTOMER|ORDER)*>
>
> <!ELEMENT CUSTOMER EMPTY >
>
> <!ATTLIST CUSTOMER
> Orders IDREFS #IMPLIED
> Id CDATA #IMPLIED
> FirstName CDATA #IMPLIED
> LastName CDATA #IMPLIED
> CompanyName CDATA #IMPLIED
> __objName ID #IMPLIED>
>
> <!ELEMENT ORDER EMPTY >
>
> <!ATTLIST ORDER
> Id CDATA #IMPLIED
> ArticleName CDATA #IMPLIED
> Amount CDATA #IMPLIED
> __objName ID #IMPLIED>
>
> ]>
>
> <OBJECTS>
> <CUSTOMER
> __objName="CUSTOMER__"
> Orders="ORDER__1 ORDER__2 "
> Id="3"
> FirstName="Tina"
> LastName="Koslovsky"
> CompanyName="Irgendwo GmbH&oKG"/>
> <ORDER
> __objName="ORDER__1"
> Id="6"
> ArticleName="Fruchtquark"
> Amount="45"/>
> <ORDER
> __objName="ORDER__2"
> Id="7"
> ArticleName="Romadour"
> Amount="20"/>
> </OBJECTS>
>
> --
> #-------------------------------------------------------------------------
> # Phone: 703-925-1487(w) # 703-729-4463(h)
> # email: mark.everline@level8.com (w) # meverline@loudoun.com (h)
> #-------------------------------------------------------------------------
> #
> # Any day you get to breath underwater is a good day.
> # - A Scuba Divers motto
> #
> #-------------------------------------------------------------------------
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: xerces-c-dev-unsubscribe@xml.apache.org
> For additional commands, e-mail: xerces-c-dev-help@xml.apache.org