You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by an...@locus.apache.org on 2000/07/07 03:08:45 UTC

cvs commit: xml-xerces/c/src/internal ReaderMgr.hpp XMLReader.cpp XMLReader.hpp XMLScanner2.cpp

andyh       00/07/06 18:08:45

  Modified:    c/src/internal ReaderMgr.hpp XMLReader.cpp XMLReader.hpp
                        XMLScanner2.cpp
  Log:
  Parser speed up in scan of XML content.
  
  Revision  Changes    Path
  1.10      +11 -0     xml-xerces/c/src/internal/ReaderMgr.hpp
  
  Index: ReaderMgr.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/ReaderMgr.hpp,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- ReaderMgr.hpp	2000/03/02 19:54:29	1.9
  +++ ReaderMgr.hpp	2000/07/07 01:08:44	1.10
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: ReaderMgr.hpp,v $
  + * Revision 1.10  2000/07/07 01:08:44  andyh
  + * Parser speed up in scan of XML content.
  + *
    * Revision 1.9  2000/03/02 19:54:29  roddey
    * This checkin includes many changes done while waiting for the
    * 1.1.0 code to be finished. I can't list them all here, but a list is
  @@ -159,6 +162,7 @@
       bool getNameToken(XMLBuffer& toFill);
       XMLCh getNextChar();
       bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
  +    bool getNextPlainContentChar(XMLCh& chGotten);
       void getSpaces(XMLBuffer& toFill);
       void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
       bool isEmpty() const;
  @@ -331,6 +335,11 @@
   inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
   {
       return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
  +}
  +
  +inline bool ReaderMgr::getNextPlainContentChar(XMLCh& chGotten)
  +{
  +    return fCurReader->getNextPlainContentChar(chGotten);
   }
   
   inline bool ReaderMgr::getThrowEOE() const
  
  
  
  1.17      +35 -0     xml-xerces/c/src/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- XMLReader.cpp	2000/07/06 21:00:51	1.16
  +++ XMLReader.cpp	2000/07/07 01:08:44	1.17
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: XMLReader.cpp,v $
  + * Revision 1.17  2000/07/07 01:08:44  andyh
  + * Parser speed up in scan of XML content.
  + *
    * Revision 1.16  2000/07/06 21:00:51  jpolast
    * inlined getNextCharIfNot() for better performance
    *
  @@ -1815,6 +1818,38 @@
   // ---------------------------------------------------------------------------
   //  XMLReader: Static data
   // ---------------------------------------------------------------------------
  +
  +       //  Plain Content Characters.  These are legal content characters that need no
  +       //   special handling or checks at all.  They do not include CR, NL, <, & or ]
  +       //
  +       //  This really wants to be merged into the main table below, although this
  +       //    would require some thought and reorganization.
  +       //
  +       //  Note:  This is generated by hand, not automatically programatically like
  +       //         fgCharCharsTable, below.
  +       // 
  +const XMLByte XMLReader::fgPlainContentChars[256] = 
  +{
  + //        0     1     2     3     4     5     6     7     9     9     A     B     C     D     E     F
  + /*0*/  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  + /*1*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  + /*2*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
  + /*3*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01,
  + /*4*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*5*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 
  + /*6*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*7*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*8*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*9*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*A*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*B*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*C*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*D*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*E*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 
  + /*F*/  0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01
  +};
  +
  +
   const XMLByte XMLReader::fgCharCharsTable[0x10000] =
   {
         0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xD0, 0xD0, 0x00, 0x00, 0xD0, 0x00, 0x00
  
  
  
  1.11      +47 -1     xml-xerces/c/src/internal/XMLReader.hpp
  
  Index: XMLReader.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.hpp,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- XMLReader.hpp	2000/07/06 21:00:52	1.10
  +++ XMLReader.hpp	2000/07/07 01:08:44	1.11
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: XMLReader.hpp,v $
  + * Revision 1.11  2000/07/07 01:08:44  andyh
  + * Parser speed up in scan of XML content.
  + *
    * Revision 1.10  2000/07/06 21:00:52  jpolast
    * inlined getNextCharIfNot() for better performance
    *
  @@ -232,6 +235,7 @@
       bool getName(XMLBuffer& toFill, const bool token);
       bool getNextChar(XMLCh& chGotten);
       bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
  +    bool getNextPlainContentChar(XMLCh& chGotten);
       bool getSpaces(XMLBuffer& toFill);
       bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
       bool peekNextChar(XMLCh& chGotten);
  @@ -503,6 +507,16 @@
       //      code and then hard coded into the cpp file for speed.
       // -----------------------------------------------------------------------
       static const XMLByte    fgCharCharsTable[0x10000];
  +
  +    //------------------------------------------------------------------------
  +    //
  +    //  Another character property table, to speed the handling of plain character data.
  +    //
  +    //    ToDo:  Figure out an efficient way to merge with the main character
  +    //           properties table.
  +    //
  +    // -----------------------------------------------------------------------
  +    static const XMLByte    fgPlainContentChars[256];
   };
   
   
  @@ -634,10 +648,42 @@
   }
   
   
  +
   // ---------------------------------------------------------------------------
  -//  XMLReader: getNextCharIfNot() method inlined for speed
  +//
  +//  XMLReader: getNextPlainContentChar() method inlined for speed
  +//
   // ---------------------------------------------------------------------------
  +inline bool XMLReader::getNextPlainContentChar(XMLCh& chGotten)
  +{
  +    //
  +    //  See if there is at least a char in the buffer. If not, just
  +    //    return false.  The more general getNextChar routine will
  +    //    succeed after this one fails.
  +    //
  +    if (fCharIndex >= fCharsAvail)
  +    {
  +        return false;
  +    }
  +
  +
  +    XMLCh c = fCharBuf[fCharIndex];
  +    if (c < 255 && XMLReader::fgPlainContentChars[c])
  +    {
  +        fCharIndex++;
  +        chGotten = c;
  +        return true;
  +    }
  +        
  +    return false;
  +}
   
  +
  +
  +
  +// ---------------------------------------------------------------------------
  +//  XMLReader: getNextCharIfNot() method inlined for speed
  +// ---------------------------------------------------------------------------
   inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
   {
       //
  
  
  
  1.14      +14 -0     xml-xerces/c/src/internal/XMLScanner2.cpp
  
  Index: XMLScanner2.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLScanner2.cpp,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- XMLScanner2.cpp	2000/05/11 23:11:33	1.13
  +++ XMLScanner2.cpp	2000/07/07 01:08:44	1.14
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: XMLScanner2.cpp,v $
  + * Revision 1.14  2000/07/07 01:08:44  andyh
  + * Parser speed up in scan of XML content.
  + *
    * Revision 1.13  2000/05/11 23:11:33  andyh
    * Add missing validity checks for stand-alone documents, character range
    * and Well-formed parsed entities.  Changes contributed by Sean MacRoibeaird
  @@ -1662,6 +1665,15 @@
               }
                else
               {
  +                 //  Eat through as many plain content characters as possible without
  +                 //  needing special handling
  +                 //
  +                if (curState == State_Waiting  &&  !gotLeadingSurrogate)
  +                {
  +                    while (fReaderMgr.getNextPlainContentChar(nextCh))
  +                        toUse.append(nextCh);
  +                }
  +
                   // Try to get another char from the source
                   if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
                   {