You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2004/09/29 02:24:01 UTC

cvs commit: xml-xerces/c/src/xercesc/internal XMLReader.cpp XMLReader.hpp

knoaman     2004/09/28 17:24:01

  Modified:    c/src/xercesc/internal XMLReader.cpp XMLReader.hpp
  Log:
  Performance: improve src offset calculation. Patch by Anthony O'Dowd.
  
  Revision  Changes    Path
  1.25      +45 -8     xml-xerces/c/src/xercesc/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- XMLReader.cpp	8 Sep 2004 13:56:14 -0000	1.24
  +++ XMLReader.cpp	29 Sep 2004 00:24:01 -0000	1.25
  @@ -279,6 +279,7 @@
       {
           // This represents no data from the source
           fCharSizeBuf[fCharsAvail] = 0;
  +        fCharOfsBuf[fCharsAvail] = 0;
           fCharBuf[fCharsAvail++] = chSpace;
       }
   }
  @@ -374,6 +375,7 @@
       {
           // This represents no data from the source
           fCharSizeBuf[fCharsAvail] = 0;
  +        fCharOfsBuf[fCharsAvail] = 0;
           fCharBuf[fCharsAvail++] = chSpace;
       }
   }
  @@ -401,11 +403,15 @@
       //  Take the current source offset and add in the sizes that we've
       //  eaten from the source so far.
       //
  -    unsigned int offset = fSrcOfsBase;
  -    for (unsigned int index = 0; index < fCharIndex; index++)
  -        offset += fCharSizeBuf[index];
  +    if( fCharIndex == 0 ) {
  +        return fSrcOfsBase;
  +    }
  +
  +    if( fCharIndex < fCharsAvail ) {
  +        return (fSrcOfsBase + fCharOfsBuf[fCharIndex]);
  +    }
   
  -    return offset;
  +    return (fSrcOfsBase + fCharOfsBuf[fCharIndex-1] + fCharSizeBuf[fCharIndex-1]);
   }
   
   
  @@ -536,6 +542,17 @@
                       fCharIndex++;
                   }
               }
  +            // If there's a utf-8 BOM  (0xEF 0xBB 0xBF), skip past it.
  +            else {
  +                const char* asChars = (const char*)fRawByteBuf;
  +                if ((fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen )&&
  +                    (XMLString::compareNString(  asChars
  +                    , XMLRecognizer::fgUTF8BOM
  +                    , XMLRecognizer::fgUTF8BOMLen) == 0) && !startInd)
  +                {
  +                    fCharIndex += XMLRecognizer::fgUTF8BOMLen;
  +                }
  +            }
           }
       }
   
  @@ -547,6 +564,15 @@
       if (!fCharsAvail)
           fNoMore = true;
   
  +    //  Calculate fCharOfsBuf using the elements from fCharBufSize
  +    if (fCalculateSrcOfs)
  +    {
  +        fCharOfsBuf[0] = 0;
  +        for (unsigned int index = 1; index < fCharsAvail; ++index) {
  +            fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
  +        }
  +    }
  +
       return (fCharsAvail != 0);
   }
   
  @@ -1263,11 +1289,13 @@
               if (fRawBytesAvail < 2)
                   break;
   
  +            unsigned int postBOMIndex = 0;
               const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex];
               if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker))
               {
                   fRawBufIndex += sizeof(UTF16Ch);
                   asUTF16++;
  +                postBOMIndex = fRawBufIndex;
               }
   
               //  First check that there are enough raw bytes for there to even
  @@ -1275,7 +1303,7 @@
               //
               if (fRawBytesAvail - fRawBufIndex < XMLRecognizer::fgUTF16PreLen)
               {
  -                fRawBufIndex = 0;
  +                fRawBufIndex = postBOMIndex;
                   break;
               }
   
  @@ -1287,7 +1315,7 @@
               {
                   if (memcmp(asUTF16, XMLRecognizer::fgUTF16BPre, XMLRecognizer::fgUTF16PreLen))
                   {
  -                    fRawBufIndex = 0;
  +                    fRawBufIndex = postBOMIndex;
                       break;
                   }
               }
  @@ -1295,7 +1323,7 @@
               {
                   if (memcmp(asUTF16, XMLRecognizer::fgUTF16LPre, XMLRecognizer::fgUTF16PreLen))
                   {
  -                    fRawBufIndex = 0;
  +                    fRawBufIndex = postBOMIndex;
                       break;
                   }
               }
  @@ -1372,6 +1400,15 @@
       //
       if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
           fCharBuf[fCharsAvail++] = chSpace;
  +    
  +    //  Calculate fCharOfsBuf buffer using the elements from fCharBufSize
  +    if (fCalculateSrcOfs)
  +    {
  +        fCharOfsBuf[0] = 0;
  +        for (unsigned int index = 1; index < fCharsAvail; ++index) {
  +            fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
  +        }
  +    }
   }
   
   
  
  
  
  1.20      +9 -0      xml-xerces/c/src/xercesc/internal/XMLReader.hpp
  
  Index: XMLReader.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v
  retrieving revision 1.19
  retrieving revision 1.20
  diff -u -r1.19 -r1.20
  --- XMLReader.hpp	8 Sep 2004 13:56:14 -0000	1.19
  +++ XMLReader.hpp	29 Sep 2004 00:24:01 -0000	1.20
  @@ -16,6 +16,9 @@
   
   /*
    * $Log$
  + * Revision 1.20  2004/09/29 00:24:01  knoaman
  + * Performance: improve src offset calculation. Patch by Anthony O'Dowd.
  + *
    * Revision 1.19  2004/09/08 13:56:14  peiyongz
    * Apache License Version 2.0
    *
  @@ -424,6 +427,11 @@
       //      to make the internalized char fCharBuf[x]. This only contains
       //      useful data if fSrcOfsSupported is true.
       //
  +    //  fCharOfsBuf
  +    //      This buffer is an array that contains the offset in the
  +    //      fRawByteBuf buffer of each char in the fCharBuf buffer. It
  +    //      only contains useful data if fSrcOfsSupported is true.
  +    //
       //  fCurCol
       //  fCurLine
       //      The current line and column that we are in within this reader's
  @@ -548,6 +556,7 @@
       XMLCh                       fCharBuf[kCharBufSize];
       unsigned int                fCharsAvail;
       unsigned char               fCharSizeBuf[kCharBufSize];
  +    unsigned int                fCharOfsBuf[kCharBufSize];
       XMLSSize_t                  fCurCol;
       XMLSSize_t                  fCurLine;
       XMLRecognizer::Encodings    fEncoding;
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org