You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ro...@locus.apache.org on 2000/01/25 02:04:22 UTC

cvs commit: xml-xerces/c/src/internal ReaderMgr.cpp ReaderMgr.hpp XMLReader.cpp XMLReader.hpp XMLScanner2.cpp

roddey      00/01/24 17:04:22

  Modified:    c/src/internal ReaderMgr.cpp ReaderMgr.hpp XMLReader.cpp
                        XMLReader.hpp XMLScanner2.cpp
  Log:
  Fixes a bogus error about ]]> in char data.
  
  Revision  Changes    Path
  1.6       +6 -32     xml-xerces/c/src/internal/ReaderMgr.cpp
  
  Index: ReaderMgr.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/ReaderMgr.cpp,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- ReaderMgr.cpp	2000/01/15 01:26:16	1.5
  +++ ReaderMgr.cpp	2000/01/25 01:04:21	1.6
  @@ -56,6 +56,9 @@
   
   /**
    * $Log: ReaderMgr.cpp,v $
  + * Revision 1.6  2000/01/25 01:04:21  roddey
  + * Fixes a bogus error about ]]> in char data.
  + *
    * Revision 1.5  2000/01/15 01:26:16  rahulj
    * Added support for HTTP to the parser using libWWW 5.2.8.
    * Renamed URL.[ch]pp to XMLURL.[ch]pp and like wise for the class name.
  @@ -149,38 +152,6 @@
   // ---------------------------------------------------------------------------
   //  ReaderMgr: Scanning APIs
   // ---------------------------------------------------------------------------
  -XMLCh ReaderMgr::getCharData(XMLBuffer&     toFill
  -                            , XMLScanner&   owningScanner
  -                            , bool&         gotLeadingSurrogate)
  -{
  -    //
  -    //  NOTE:   We DO NOT reset the buffer here. This is an accumulation
  -    //          method that will be called multiple times in some cases to
  -    //          get all the contiguous char data.
  -    //
  -    //  Ok, so enter the loop and get char data until we can't go no more.
  -    //
  -    XMLCh breakCh = chNull;
  -    while (true)
  -    {
  -        //
  -        //  Ask the current reader for all he's got. He will return the
  -        //  break character that caused him to break out. If its null, then
  -        //  it just means he has no more data, so we can pop a reader and
  -        //  keep going. Otherwise, we return with that break char.
  -        //
  -        breakCh = fCurReader->getCharData(toFill, owningScanner, gotLeadingSurrogate);
  -        if (breakCh)
  -            break;
  -
  -        // If we cannot pop a reader, then just break out with the null char
  -        if (!popReader())
  -            break;
  -    }
  -    return breakCh;
  -}
  -
  -
   XMLCh ReaderMgr::getNextChar()
   {
       XMLCh chRet;
  
  
  
  1.5       +11 -6     xml-xerces/c/src/internal/ReaderMgr.hpp
  
  Index: ReaderMgr.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/ReaderMgr.hpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- ReaderMgr.hpp	2000/01/24 20:40:43	1.4
  +++ ReaderMgr.hpp	2000/01/25 01:04:21	1.5
  @@ -56,6 +56,9 @@
   
   /**
    * $Log: ReaderMgr.hpp,v $
  + * Revision 1.5  2000/01/25 01:04:21  roddey
  + * Fixes a bogus error about ]]> in char data.
  + *
    * Revision 1.4  2000/01/24 20:40:43  roddey
    * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
    * is not tested yet, but I wanted to get the API changes in now so that the API
  @@ -137,15 +140,10 @@
       //  character spooling methods.
       // -----------------------------------------------------------------------
       bool atEOF() const;
  -    XMLCh getCharData
  -    (
  -        XMLBuffer&      toFill
  -        , XMLScanner&   owningScanner
  -        , bool&         gotLeadingSurrogate
  -    );
       bool getName(XMLBuffer& toFill);
       bool getNameToken(XMLBuffer& toFill);
       XMLCh getNextChar();
  +    bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
       void getSpaces(XMLBuffer& toFill);
       void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
       bool isEmpty() const;
  @@ -303,6 +301,11 @@
   {
       toFill.reset();
       return fCurReader->getName(toFill, true);
  +}
  +
  +inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
  +{
  +    return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
   }
   
   inline bool ReaderMgr::getThrowEOE() const
  
  
  
  1.6       +101 -139  xml-xerces/c/src/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- XMLReader.cpp	2000/01/22 00:01:07	1.5
  +++ XMLReader.cpp	2000/01/25 01:04:21	1.6
  @@ -56,6 +56,9 @@
   
   /**
    * $Log: XMLReader.cpp,v $
  + * Revision 1.6  2000/01/25 01:04:21  roddey
  + * Fixes a bogus error about ]]> in char data.
  + *
    * Revision 1.5  2000/01/22 00:01:07  roddey
    * Simple change to get rid of two hard coded 'x' type characters, which won't
    * work on EBCDIC systems.
  @@ -487,139 +490,6 @@
   // ---------------------------------------------------------------------------
   //  XMLReader: Scanning methods
   // ---------------------------------------------------------------------------
  -XMLCh XMLReader::getCharData(   XMLBuffer&      toFill
  -                                , XMLScanner&   owningScanner
  -                                , bool&         gotLeadingSurrogate)
  -{
  -    //
  -    //  Ok, lets loop through the chars in the buffer until we eat them up
  -    //  and then reload and try again.
  -    //
  -    while (true)
  -    {
  -        while (fCharIndex < fCharsAvail)
  -        {
  -            // Peek the next char from the buffer, but don't eat it yet
  -            XMLCh nextCh = fCharBuf[fCharIndex];
  -
  -            //
  -            //  Ok, lets check whether its a special character data char.
  -            //  If not, then we handle it here. Else we break out and let
  -            //  the caller handle it.
  -            //
  -            if (!XMLReader::isSpecialCharDataChar(nextCh))
  -            {
  -                // Eat this one
  -                fCharIndex++;
  -
  -                // Deal with surrogate pairs
  -                if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
  -                {
  -                    //
  -                    //  Its a leading surrogate. If we already got one, then
  -                    //  issue an error, else set leading flag to make sure that
  -                    //  we look for a trailing next time.
  -                    //
  -                    if (gotLeadingSurrogate)
  -                        owningScanner.emitError(XML4CErrs::Expected2ndSurrogateChar);
  -                    else
  -                        gotLeadingSurrogate = true;
  -                }
  -                 else
  -                {
  -                    //
  -                    //  If its a trailing surrogate, make sure that we are
  -                    //  prepared for that. Else, its just a regular char so make
  -                    //  sure that we were not expected a trailing surrogate.
  -                    //
  -                    if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
  -                    {
  -                        // Its trailing, so make sure we were expecting it
  -                        if (!gotLeadingSurrogate)
  -                            owningScanner.emitError(XML4CErrs::Unexpected2ndSurrogateChar);
  -                    }
  -                     else
  -                    {
  -                        //
  -                        //  Its just a char, so make sure we were not expecting a
  -                        //  trailing surrogate.
  -                        //
  -                        if (gotLeadingSurrogate)
  -                            owningScanner.emitError(XML4CErrs::Expected2ndSurrogateChar);
  -
  -                        // Check for valid characters here
  -                        if (!XMLReader::isXMLChar(nextCh))
  -                        {
  -                            XMLCh tmpBuf[9];
  -                            XMLString::binToText
  -                            (
  -                                nextCh
  -                                , tmpBuf
  -                                , 8
  -                                , 16
  -                            );
  -                            owningScanner.emitError(XML4CErrs::InvalidCharacter, tmpBuf);
  -                        }
  -                    }
  -                    gotLeadingSurrogate = false;
  -                }
  -
  -                //
  -                //  Keep the line and column info up to date and do new
  -                //  line normalization.
  -                //
  -                if (nextCh == chCR)
  -                {
  -                    fCurLine++;
  -                    fCurCol = 1;
  -
  -                    if (fSource == Source_External)
  -                    {
  -                        if (fCharIndex < fCharsAvail)
  -                        {
  -                            if (fCharBuf[fCharIndex] == chLF)
  -                                fCharIndex++;
  -                        }
  -                         else
  -                        {
  -                            if (refreshCharBuffer())
  -                            {
  -                                if (fCharBuf[fCharIndex] == chLF)
  -                                    fCharIndex++;
  -                            }
  -                        }
  -                        nextCh = chLF;
  -                    }
  -                }
  -                 else if (nextCh == chLF)
  -                {
  -                    fCurLine++;
  -                    fCurCol = 1;
  -                }
  -                 else
  -                {
  -                    fCurCol++;
  -                }
  -
  -                // Add the (possibly normalized) char to the buffer and go again
  -                toFill.append(nextCh);
  -            }
  -             else
  -            {
  -                return nextCh;
  -            }
  -        }
  -
  -        // We've used up the current buffer so try to get more
  -        if (!refreshCharBuffer())
  -            break;
  -    }
  -
  -    // We ate up all this reader's data
  -    return chNull;
  -}
  -
  -
   bool XMLReader::getName(XMLBuffer& toFill, const bool token)
   {
       //
  @@ -694,25 +564,116 @@
       {
           // If fNoMore is set, then we have nothing else to give
           if (fNoMore)
  -        {
  -            chGotten = XMLCh(0);
               return false;
  -        }
   
           // If the buffer is empty, then try to refresh
           if (fCharIndex == fCharsAvail)
           {
               if (!refreshCharBuffer())
               {
  -                // If still empty, then return a zero char
  +                // If still empty, then return false
                   if (fCharIndex == fCharsAvail)
  -                {
  -                    chGotten = XMLCh(0);
                       return false;
  +            }
  +        }
  +
  +        chGotten = fCharBuf[fCharIndex++];
  +    }
  +
  +    // Handle end of line normalization and line/col member maintenance.
  +    if (chGotten == chCR)
  +    {
  +        //
  +        //  Do the normalization. We return chLF regardless of which was
  +        //  found. We also eat a chCR followed by an chLF.
  +        //
  +        //  We only do this if the content being spooled is not already
  +        //  internalized.
  +        //
  +        if (fSource == Source_External)
  +        {
  +            //
  +            //  See if we have another char left. If not, don't bother.
  +            //  Else, see if its an chLF to eat. If it is, bump the
  +            //  index again.
  +            //
  +            if (fCharIndex < fCharsAvail)
  +            {
  +                if (fCharBuf[fCharIndex] == chLF)
  +                    fCharIndex++;
  +            }
  +             else
  +            {
  +                if (refreshCharBuffer())
  +                {
  +                    if (fCharBuf[fCharIndex] == chLF)
  +                        fCharIndex++;
                   }
               }
  +
  +            // And return just an chLF
  +            chGotten = chLF;
  +        }
  +
  +        // And handle the line/col stuff
  +        fCurCol = 1;
  +        fCurLine++;
  +    }
  +     else if (chGotten == chLF)
  +    {
  +        fCurLine++;
  +        fCurCol = 1;
  +    }
  +     else if (chGotten)
  +    {
  +        //
  +        //  Only do this is not a null char. Null chars are not part of the
  +        //  real content. They are just marker characters inserted into
  +        //  the stream.
  +        //
  +        fCurCol++;
  +    }
  +    return true;
  +}
  +
  +
  +bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
  +{
  +    //
  +    //  See if there is at least a char in the buffer. Else, do the buffer
  +    //  reload logic.
  +    //
  +    if (fCharIndex < fCharsAvail)
  +    {
  +        // Check the next char
  +        if (fCharBuf[fCharIndex] == chNotToGet)
  +            return false;
  +
  +        // Its not the one we want to skip so bump the index
  +        chGotten = fCharBuf[fCharIndex++];
  +    }
  +     else
  +    {
  +        // If fNoMore is set, then we have nothing else to give
  +        if (fNoMore)
  +            return false;
  +
  +        // If the buffer is empty, then try to refresh
  +        if (fCharIndex == fCharsAvail)
  +        {
  +            if (!refreshCharBuffer())
  +            {
  +                // If still empty, then return false
  +                if (fCharIndex == fCharsAvail)
  +                    return false;
  +            }
           }
   
  +        // Check the next char
  +        if (fCharBuf[fCharIndex] == chNotToGet)
  +            return false;
  +
  +        // Its not the one we want to skip so bump the index
           chGotten = fCharBuf[fCharIndex++];
       }
   
  
  
  
  1.5       +5 -6      xml-xerces/c/src/internal/XMLReader.hpp
  
  Index: XMLReader.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.hpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLReader.hpp	2000/01/22 00:01:08	1.4
  +++ XMLReader.hpp	2000/01/25 01:04:21	1.5
  @@ -56,6 +56,9 @@
   
   /**
    * $Log: XMLReader.hpp,v $
  + * Revision 1.5  2000/01/25 01:04:21  roddey
  + * Fixes a bogus error about ]]> in char data.
  + *
    * Revision 1.4  2000/01/22 00:01:08  roddey
    * Simple change to get rid of two hard coded 'x' type characters, which won't
    * work on EBCDIC systems.
  @@ -199,14 +202,9 @@
       // -----------------------------------------------------------------------
       //  Scanning methods
       // -----------------------------------------------------------------------
  -    XMLCh getCharData
  -    (
  -        XMLBuffer&      toFill
  -        , XMLScanner&   owningScanner
  -        , bool&         gotLeadingSurrogate
  -    );
       bool getName(XMLBuffer& toFill, const bool token);
       bool getNextChar(XMLCh& chGotten);
  +    bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
       bool getSpaces(XMLBuffer& toFill);
       bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
       bool peekNextChar(XMLCh& chGotten);
  
  
  
  1.7       +8 -20     xml-xerces/c/src/internal/XMLScanner2.cpp
  
  Index: XMLScanner2.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLScanner2.cpp,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- XMLScanner2.cpp	2000/01/15 01:26:16	1.6
  +++ XMLScanner2.cpp	2000/01/25 01:04:21	1.7
  @@ -56,6 +56,9 @@
   
   /**
    * $Log: XMLScanner2.cpp,v $
  + * Revision 1.7  2000/01/25 01:04:21  roddey
  + * Fixes a bogus error about ]]> in char data.
  + *
    * Revision 1.6  2000/01/15 01:26:16  rahulj
    * Added support for HTTP to the parser using libWWW 5.2.8.
    * Renamed URL.[ch]pp to XMLURL.[ch]pp and like wise for the class name.
  @@ -1601,11 +1604,6 @@
       {
           while (true)
           {
  -            //
  -            //  Ok, lets get char data from the the readers until we hit
  -            //  a special char. It returns the char that made us break out
  -            //  of the loop. Use any second char first.
  -            //
               if (secondCh)
               {
                   nextCh = secondCh;
  @@ -1613,18 +1611,8 @@
               }
                else
               {
  -                // Reset the surrogate flag and get another block of chars
  -                nextCh = fReaderMgr.getCharData(toUse, *this, gotLeadingSurrogate);
  -
  -                //
  -                //  If we get an EOF, that's not really expected here, but it
  -                //  could happen if there was effectively no content, so just
  -                //  return.
  -                //
  -                //  Also, if we get the open angle of some markup, then break
  -                //  out.
  -                //
  -                if ((nextCh == chOpenAngle) || !nextCh)
  +                // Try to get another char from the source
  +                if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
                   {
                       // If we were waiting for a trailing surrogate, its an error
                       if (gotLeadingSurrogate)
  @@ -1633,9 +1621,6 @@
                       notDone = false;
                       break;
                   }
  -
  -                // Get the break char out of the input buffer now
  -                fReaderMgr.getNextChar();
               }
   
               //