You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by tn...@apache.org on 2002/11/25 22:31:09 UTC

cvs commit: xml-xerces/c/src/xercesc/internal ReaderMgr.cpp XMLReader.cpp XMLReader.hpp

tng         2002/11/25 13:31:08

  Modified:    c/src/xercesc/internal ReaderMgr.cpp XMLReader.cpp
                        XMLReader.hpp
  Log:
  Performance:
  1. use XMLRecognizer::Encodings enum to make new transcode, faster than comparing the encoding string every time.
  2. Pre uppercase the encodingString before calling encodingForName to avoid calling compareIString
  
  Revision  Changes    Path
  1.7       +2 -2      xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp
  
  Index: ReaderMgr.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/ReaderMgr.cpp,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ReaderMgr.cpp	4 Nov 2002 14:58:18 -0000	1.6
  +++ ReaderMgr.cpp	25 Nov 2002 21:31:08 -0000	1.7
  @@ -712,7 +712,7 @@
           sysId
           , 0
           , newStream
  -        , XMLUni::fgXMLChEncodingString
  +        , XMLRecognizer::XERCES_XMLCH
           , refFrom
           , type
           , XMLReader::Source_Internal
  
  
  
  1.6       +163 -42   xml-xerces/c/src/xercesc/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- XMLReader.cpp	4 Nov 2002 14:58:18 -0000	1.5
  +++ XMLReader.cpp	25 Nov 2002 21:31:08 -0000	1.6
  @@ -274,6 +274,7 @@
   
       // Copy the encoding string to our member
       fEncodingStr = XMLString::replicate(encodingStr);
  +    XMLString::upperCase(fEncodingStr);
   
       // Ask the transcoding service if it supports src offset info
       fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();
  @@ -293,9 +294,116 @@
       //  forced, this will be the one we will use, period.
       //
       XMLTransService::Codes failReason;
  +    if (fEncoding == XMLRecognizer::OtherEncoding)
  +    {
  +        //
  +        //  fEncodingStr not  pre-recognized, use it
  +        //  directly for transcoder
  +        //
  +        fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  +        (
  +            fEncodingStr
  +            , failReason
  +            , kCharBufSize
  +        );
  +    }
  +     else
  +    {
  +        //
  +        //  Use the recognized fEncoding to create the transcoder
  +        //
  +        fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  +        (
  +            fEncoding
  +            , failReason
  +            , kCharBufSize
  +        );
  +
  +    }
  +
  +    if (!fTranscoder)
  +    {
  +        ThrowXML1
  +        (
  +            TranscodingException
  +            , XMLExcepts::Trans_CantCreateCvtrFor
  +            , fEncodingStr
  +        );
  +    }
  +
  +    //
  +    //  Note that, unlike above, we do not do an initial decode of the
  +    //  first line. We take the caller's word that the encoding is correct
  +    //  and just assume that the first bulk decode (kicked off by the first
  +    //  get of a character) will work.
  +    //
  +    //  So we do here the slipping in of the leading space if required.
  +    //
  +    if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
  +    {
  +        // This represents no data from the source
  +        fCharSizeBuf[fCharsAvail] = 0;
  +        fCharBuf[fCharsAvail++] = chSpace;
  +    }
  +}
  +
  +
  +XMLReader::XMLReader(const  XMLCh* const            pubId
  +                    , const XMLCh* const            sysId
  +                    ,       BinInputStream* const   streamToAdopt
  +                    , XMLRecognizer::Encodings      encodingEnum
  +                    , const RefFrom                 from
  +                    , const Types                   type
  +                    , const Sources                 source
  +                    , const bool                    throwAtEnd) :
  +    fCharIndex(0)
  +    , fCharsAvail(0)
  +    , fCurCol(1)
  +    , fCurLine(1)
  +    , fEncoding(XMLRecognizer::UTF_8)
  +    , fEncodingStr(0)
  +    , fForcedEncoding(true)
  +    , fNoMore(false)
  +    , fPublicId(XMLString::replicate(pubId))
  +    , fRawBufIndex(0)
  +    , fRawBytesAvail(0)
  +    , fReaderNum(0xFFFFFFFF)
  +    , fRefFrom(from)
  +    , fSentTrailingSpace(false)
  +    , fSource(source)
  +    , fSpareCh(0)
  +    , fSrcOfsBase(0)
  +    , fSrcOfsSupported(false)
  +    , fStream(streamToAdopt)
  +    , fSystemId(XMLString::replicate(sysId))
  +    , fSwapped(false)
  +    , fThrowAtEnd(throwAtEnd)
  +    , fTranscoder(0)
  +    , fType(type)
  +{
  +    // Do an initial load of raw bytes
  +    refreshRawBuffer();
  +
  +    // Ask the transcoding service if it supports src offset info
  +    fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();
  +
  +    //
  +    //  Use the passed encoding code
  +    //
  +    fEncoding = encodingEnum;
  +    fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding));
  +
  +    // Check whether the fSwapped flag should be set or not
  +    checkForSwapped();
  +
  +    //
  +    //  Create a transcoder for the encoding. Since the encoding has been
  +    //  forced, this will be the one we will use, period.
  +    //
  +    XMLTransService::Codes failReason;
       fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
       (
  -        fEncodingStr
  +        fEncoding
           , failReason
           , kCharBufSize
       );
  @@ -1136,13 +1244,11 @@
       if (fForcedEncoding)
           return true;
   
  -    // Clean up the old encoding string
  -	// Do not delete until we know we have a good encoding
  -//	if (fEncodingStr)
  -//	{
  -//		delete [] fEncodingStr;
  -//		fEncodingStr = 0;
  -//	}
  +    //
  +    // upperCase the newEncoding first for better performance
  +    //
  +    XMLCh* inputEncoding = XMLString::replicate(newEncoding);
  +    XMLString::upperCase(inputEncoding);
   
       //
       //  Try to map the string to one of our standard encodings. If its not
  @@ -1152,7 +1258,7 @@
       //
       XMLRecognizer::Encodings newBaseEncoding = XMLRecognizer::encodingForName
       (
  -        newEncoding
  +        inputEncoding
       );
   
       //
  @@ -1166,12 +1272,14 @@
           //  are already in one of the endian versions of those encodings,
           //  then just keep it and go on. Otherwise, its not valid.
           //
  -        if (!XMLString::compareIString(newEncoding, XMLUni::fgUTF16EncodingString)
  -        ||  !XMLString::compareIString(newEncoding, XMLUni::fgUTF16EncodingString2)
  -        ||  !XMLString::compareIString(newEncoding, XMLUni::fgUTF16EncodingString3)
  -        ||  !XMLString::compareIString(newEncoding, XMLUni::fgUTF16EncodingString4)
  -        ||  !XMLString::compareIString(newEncoding, XMLUni::fgUTF16EncodingString5))
  +        if (!XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString)
  +        ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString2)
  +        ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString3)
  +        ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString4)
  +        ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString5))
           {
  +            delete [] inputEncoding;
  +
               if ((fEncoding != XMLRecognizer::UTF_16L)
               &&  (fEncoding != XMLRecognizer::UTF_16B))
               {
  @@ -1182,20 +1290,20 @@
               newBaseEncoding = fEncoding;
   
               if (fEncoding == XMLRecognizer::UTF_16L) {
  -
  -				delete [] fEncodingStr;
  +                delete [] fEncodingStr;
                   fEncodingStr = XMLString::replicate(XMLUni::fgUTF16LEncodingString);
  -			}
  +            }
               else {
  -
  -				delete [] fEncodingStr;
  +                delete [] fEncodingStr;
                   fEncodingStr = XMLString::replicate(XMLUni::fgUTF16BEncodingString);
  -			}
  +            }
           }
  -         else if (!XMLString::compareIString(newEncoding, XMLUni::fgUCS4EncodingString)
  -              ||  !XMLString::compareIString(newEncoding, XMLUni::fgUCS4EncodingString2)
  -              ||  !XMLString::compareIString(newEncoding, XMLUni::fgUCS4EncodingString3))
  +        else if (!XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString)
  +             ||  !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString2)
  +             ||  !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString3))
           {
  +            delete [] inputEncoding;
  +
               if ((fEncoding != XMLRecognizer::UCS_4L)
               &&  (fEncoding != XMLRecognizer::UCS_4B))
               {
  @@ -1209,42 +1317,55 @@
   
                   delete [] fEncodingStr;
                   fEncodingStr = XMLString::replicate(XMLUni::fgUCS4LEncodingString);
  -			}
  +            }
               else {
   
                   delete [] fEncodingStr;
                   fEncodingStr = XMLString::replicate(XMLUni::fgUCS4BEncodingString);
  -			}
  +            }
           }
            else
           {
  +            //
               // None of those special cases, so just replicate the new name
  +            // and use it directly to create the transcoder
  +            //
               delete [] fEncodingStr;
  -            fEncodingStr = XMLString::replicate(newEncoding);
  +            fEncodingStr = inputEncoding;
  +
  +            XMLTransService::Codes failReason;
  +            fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  +            (
  +                fEncodingStr
  +                , failReason
  +                , kCharBufSize
  +            );
           }
       }
        else
       {
           // Store the new encoding string since it is just an intrinsic
           delete [] fEncodingStr;
  -        fEncodingStr = XMLString::replicate(newEncoding);
  +        fEncodingStr = inputEncoding;
       }
   
  -    //
  -    //  Now we can create a transcoder using the transcoding service. We
  -    //  might get back a transcoder for an intrinsically supported encoding,
  -    //  or we might get one from the underlying transcoding service.
  -    //
  -    XMLTransService::Codes failReason;
  -    fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  -    (
  -        fEncodingStr
  -        , failReason
  -        , kCharBufSize
  -    );
  +    if (!fTranscoder) {
  +        //
  +        //  Now we can create a transcoder using the recognized fEncoding.  We
  +        //  might get back a transcoder for an intrinsically supported encoding,
  +        //  or we might get one from the underlying transcoding service.
  +        //
  +        XMLTransService::Codes failReason;
  +        fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
  +        (
  +            newBaseEncoding
  +            , failReason
  +            , kCharBufSize
  +        );
   
  -    if (!fTranscoder)
  -        ThrowXML1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr);
  +        if (!fTranscoder)
  +            ThrowXML1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr);
  +    }
   
       // Update the base encoding member with the new base encoding found
       fEncoding = newBaseEncoding;
  
  
  
  1.5       +15 -0     xml-xerces/c/src/xercesc/internal/XMLReader.hpp
  
  Index: XMLReader.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLReader.hpp	4 Nov 2002 14:58:19 -0000	1.4
  +++ XMLReader.hpp	25 Nov 2002 21:31:08 -0000	1.5
  @@ -56,6 +56,11 @@
   
   /*
    * $Log$
  + * Revision 1.5  2002/11/25 21:31:08  tng
  + * Performance:
  + * 1. use XMLRecognizer::Encodings enum to make new transcode, faster than comparing the encoding string every time.
  + * 2. Pre uppercase the encodingString before calling encodingForName to avoid calling compareIString
  + *
    * Revision 1.4  2002/11/04 14:58:19  tng
    * C++ Namespace Support.
    *
  @@ -256,6 +261,18 @@
           , const XMLCh* const                sysId
           ,       BinInputStream* const       streamToAdopt
           , const XMLCh* const                encodingStr
  +        , const RefFrom                     from
  +        , const Types                       type
  +        , const Sources                     source
  +        , const bool                        throwAtEnd = false
  +    );
  +
  +    XMLReader
  +    (
  +        const   XMLCh* const                pubId
  +        , const XMLCh* const                sysId
  +        ,       BinInputStream* const       streamToAdopt
  +        , XMLRecognizer::Encodings          encodingEnum
           , const RefFrom                     from
           , const Types                       type
           , const Sources                     source
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org