You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2019/12/15 21:18:15 UTC
svn commit: r1871620 - in /xerces/c/trunk: src/xercesc/internal/
tests/src/XSTSHarness/regression/
tests/src/XSTSHarness/regression/XERCESC-2180/
Author: amassari
Date: Sun Dec 15 21:18:15 2019
New Revision: 1871620
URL: http://svn.apache.org/viewvc?rev=1871620&view=rev
Log:
[XERCESC-2180] Remove assertion when a surrogate pair is split by the boundary
of an input buffer (transcoders try to avoid this, but UTF-16 transcoder doesn't
have this check in place). The reader now pulls in more data on demand.
Added:
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml (with props)
xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml (with props)
xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet
Modified:
xerces/c/trunk/src/xercesc/internal/XMLReader.cpp
Modified: xerces/c/trunk/src/xercesc/internal/XMLReader.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/XMLReader.cpp?rev=1871620&r1=1871619&r2=1871620&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/XMLReader.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/XMLReader.cpp Sun Dec 15 21:18:15 2019
@@ -646,11 +646,16 @@ bool XMLReader::getName(XMLBuffer& toFil
if (!token)
{
if ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F)) {
- // make sure one more char is in the buffer, the transcoder
- // should put only a complete surrogate pair into the buffer
- assert(fCharIndex+1 < fCharsAvail);
- if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
- return false;
+ // if there isn't one more char in the buffer, read more data
+ if (fCharIndex+1 == fCharsAvail)
+ {
+ if (!refreshCharBuffer())
+ return false;
+ // reset the start buffer to the new location of the cursor
+ charIndex_start = fCharIndex;
+ }
+ if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
+ return false;
// Looks ok, so lets eat it
fCharIndex += 2;
@@ -675,9 +680,21 @@ bool XMLReader::getName(XMLBuffer& toFil
// break out.
if ( (fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) )
{
- // make sure one more char is in the buffer, the transcoder
- // should put only a complete surrogate pair into the buffer
- assert(fCharIndex+1 < fCharsAvail);
+ // if there isn't one more char in the buffer, read more data
+ if (fCharIndex+1 == fCharsAvail)
+ {
+ // but first copy the accepted character(s), and update column
+ if (fCharIndex != charIndex_start)
+ {
+ fCurCol += (XMLFileLoc)(fCharIndex - charIndex_start);
+ toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start);
+ }
+
+ if (!refreshCharBuffer())
+ break;
+
+ charIndex_start = fCharIndex;
+ }
if ( (fCharBuf[fCharIndex+1] < 0xDC00) ||
(fCharBuf[fCharIndex+1] > 0xDFFF) )
break;
@@ -721,9 +738,14 @@ bool XMLReader::getNCName(XMLBuffer& toF
// what's the point in living mannnn? Just give up now. We only do this
// if its a name and not a name token that they want.
if ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F)) {
- // make sure one more char is in the buffer, the transcoder
- // should put only a complete surrogate pair into the buffer
- assert(fCharIndex+1 < fCharsAvail);
+ // if there isn't one more char in the buffer, read more data
+ if (fCharIndex+1 == fCharsAvail)
+ {
+ if (!refreshCharBuffer())
+ return false;
+ // reset the start buffer to the new location of the cursor
+ charIndex_start = fCharIndex;
+ }
if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
return false;
@@ -758,7 +780,28 @@ bool XMLReader::getNCName(XMLBuffer& toF
// Check the current char and take it if it's a name char
while(fCharIndex < fCharsAvail)
{
- if((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) && fCharIndex+1 < fCharsAvail && ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))) fCharIndex+=2;
+ if((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))
+ {
+ // if there isn't one more char in the buffer, read more data
+ if (fCharIndex+1 == fCharsAvail)
+ {
+ // but first copy the accepted character(s), and update column
+ if (fCharIndex != charIndex_start)
+ {
+ fCurCol += (XMLFileLoc)(fCharIndex - charIndex_start);
+ toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start);
+ }
+
+ if (!refreshCharBuffer())
+ break;
+
+ charIndex_start = fCharIndex;
+ }
+ if ( (fCharBuf[fCharIndex+1] < 0xDC00) ||
+ (fCharBuf[fCharIndex+1] > 0xDFFF) )
+ break;
+ fCharIndex += 2;
+ }
else if(isNCNameChar(fCharBuf[fCharIndex])) fCharIndex++;
else break;
}
Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml?rev=1871620&view=auto
==============================================================================
Binary file - no diff available.
Propchange: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml?rev=1871620&view=auto
==============================================================================
Binary file - no diff available.
Propchange: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet?rev=1871620&view=auto
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet (added)
+++ xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet Sun Dec 15 21:18:15 2019
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<TESTSUITE>
+ <TESTCASES xml:base="XERCESC-2180">
+ <!-- https://issues.apache.org/jira/browse/XERCESC-2180: Assertion when scanner splits a surrogate pair across two separate buffers -->
+ <TEST ID="XERCESC-2180" TYPE="invalid" URI="crash.xml"/>
+ <TEST ID="XERCESC-2180" TYPE="invalid" URI="crash2.xml"/>
+ </TESTCASES>
+</TESTSUITE>
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org