You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2019/12/15 21:18:15 UTC

svn commit: r1871620 - in /xerces/c/trunk: src/xercesc/internal/ tests/src/XSTSHarness/regression/ tests/src/XSTSHarness/regression/XERCESC-2180/

Author: amassari
Date: Sun Dec 15 21:18:15 2019
New Revision: 1871620

URL: http://svn.apache.org/viewvc?rev=1871620&view=rev
Log:
[XERCESC-2180] Remove assertion when a surrogate pair is split by the boundary
of an input buffer (transcoders try to avoid this, but UTF-16 transcoder doesn't
have this check in place). The reader now pulls in more data on demand.

Added:
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml   (with props)
    xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml   (with props)
    xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet
Modified:
    xerces/c/trunk/src/xercesc/internal/XMLReader.cpp

Modified: xerces/c/trunk/src/xercesc/internal/XMLReader.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/internal/XMLReader.cpp?rev=1871620&r1=1871619&r2=1871620&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/internal/XMLReader.cpp (original)
+++ xerces/c/trunk/src/xercesc/internal/XMLReader.cpp Sun Dec 15 21:18:15 2019
@@ -646,11 +646,16 @@ bool XMLReader::getName(XMLBuffer& toFil
     if (!token)
     {
         if ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F)) {
-           // make sure one more char is in the buffer, the transcoder
-           // should put only a complete surrogate pair into the buffer
-           assert(fCharIndex+1 < fCharsAvail);
-           if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
-               return false;
+            // if there isn't one more char in the buffer, read more data
+            if (fCharIndex+1 == fCharsAvail)
+            {
+                if (!refreshCharBuffer())
+                    return false;
+                // reset the start buffer to the new location of the cursor
+                charIndex_start = fCharIndex;
+            }
+            if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
+                return false;
 
             // Looks ok, so lets eat it
             fCharIndex += 2;
@@ -675,9 +680,21 @@ bool XMLReader::getName(XMLBuffer& toFil
             //  break out.
             if ( (fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) )
             {
-                // make sure one more char is in the buffer, the transcoder
-                // should put only a complete surrogate pair into the buffer
-                assert(fCharIndex+1 < fCharsAvail);
+                // if there isn't one more char in the buffer, read more data
+                if (fCharIndex+1 == fCharsAvail)
+                {
+                    // but first copy the accepted character(s), and update column
+                    if (fCharIndex != charIndex_start)
+                    {
+                        fCurCol += (XMLFileLoc)(fCharIndex - charIndex_start);
+                        toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start);
+                    }
+
+                    if (!refreshCharBuffer())
+                        break;
+
+                    charIndex_start = fCharIndex;
+                }
                 if ( (fCharBuf[fCharIndex+1] < 0xDC00) ||
                         (fCharBuf[fCharIndex+1] > 0xDFFF)  )
                     break;
@@ -721,9 +738,14 @@ bool XMLReader::getNCName(XMLBuffer& toF
     //  what's the point in living mannnn? Just give up now. We only do this
     //  if its a name and not a name token that they want.
     if ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F)) {
-        // make sure one more char is in the buffer, the transcoder
-        // should put only a complete surrogate pair into the buffer
-        assert(fCharIndex+1 < fCharsAvail);
+        // if there isn't one more char in the buffer, read more data
+        if (fCharIndex+1 == fCharsAvail)
+        {
+            if (!refreshCharBuffer())
+                return false;
+            // reset the start buffer to the new location of the cursor
+            charIndex_start = fCharIndex;
+        }
         if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
             return false;
 
@@ -758,7 +780,28 @@ bool XMLReader::getNCName(XMLBuffer& toF
         //  Check the current char and take it if it's a name char
         while(fCharIndex < fCharsAvail)
         {
-            if((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) && fCharIndex+1 < fCharsAvail && ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))) fCharIndex+=2;
+            if((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))
+            {
+                // if there isn't one more char in the buffer, read more data
+                if (fCharIndex+1 == fCharsAvail)
+                {
+                    // but first copy the accepted character(s), and update column
+                    if (fCharIndex != charIndex_start)
+                    {
+                        fCurCol += (XMLFileLoc)(fCharIndex - charIndex_start);
+                        toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start);
+                    }
+
+                    if (!refreshCharBuffer())
+                        break;
+
+                    charIndex_start = fCharIndex;
+                }
+                if ( (fCharBuf[fCharIndex+1] < 0xDC00) ||
+                    (fCharBuf[fCharIndex+1] > 0xDFFF)  )
+                    break;
+                fCharIndex += 2;
+            }
             else if(isNCNameChar(fCharBuf[fCharIndex])) fCharIndex++;
             else break;
         }

Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml?rev=1871620&view=auto
==============================================================================
Binary file - no diff available.

Propchange: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash.xml
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml?rev=1871620&view=auto
==============================================================================
Binary file - no diff available.

Propchange: xerces/c/trunk/tests/src/XSTSHarness/regression/XERCESC-2180/crash2.xml
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet
URL: http://svn.apache.org/viewvc/xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet?rev=1871620&view=auto
==============================================================================
--- xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet (added)
+++ xerces/c/trunk/tests/src/XSTSHarness/regression/XercesXML.testSet Sun Dec 15 21:18:15 2019
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<TESTSUITE>
+  <TESTCASES xml:base="XERCESC-2180">
+    <!-- https://issues.apache.org/jira/browse/XERCESC-2180: Assertion when scanner splits a surrogate pair across two separate buffers -->
+    <TEST ID="XERCESC-2180" TYPE="invalid" URI="crash.xml"/>
+    <TEST ID="XERCESC-2180" TYPE="invalid" URI="crash2.xml"/>
+  </TESTCASES>
+</TESTSUITE>
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org