You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by tn...@apache.org on 2002/05/30 18:20:57 UTC

cvs commit: xml-xerces/c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp

tng         02/05/30 09:20:57

  Modified:    c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp
  Log:
  Add feature to optionally ignore external DTD.
  
  Revision  Changes    Path
  1.10      +300 -23   xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
  
  Index: XMLScanner.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- XMLScanner.cpp	28 May 2002 20:42:38 -0000	1.9
  +++ XMLScanner.cpp	30 May 2002 16:20:57 -0000	1.10
  @@ -55,7 +55,7 @@
    */
   
   /*
  - * $Id: XMLScanner.cpp,v 1.9 2002/05/28 20:42:38 tng Exp $
  + * $Id: XMLScanner.cpp,v 1.10 2002/05/30 16:20:57 tng Exp $
    */
   
   
  @@ -221,6 +221,7 @@
       , fRootElemName(0)
       , fExternalSchemaLocation(0)
       , fExternalNoNamespaceSchemaLocation(0)
  +    , fLoadExternalDTD(true)
   {
      commonInit();
   
  @@ -284,6 +285,7 @@
       , fRootElemName(0)
       , fExternalSchemaLocation(0)
       , fExternalNoNamespaceSchemaLocation(0)
  +    , fLoadExternalDTD(true)
   {
      commonInit();
   
  @@ -2215,28 +2217,7 @@
                   }
                    else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString))
                   {
  -                    if (!fReuseGrammar && fValidatorFromUser && !fValidator->handlesDTD())
  -                    {
  -                        ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
  -                    }
  -
  -                    //
  -                    //  We have a doc type. So, create a DTDScanner and
  -                    //  switch the Grammar to the emptyNamespace one.
  -                    //
  -
  -                    if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate)
  -                    {
  -                        fValidator->emitError
  -                        (
  -                            XMLValid::GrammarNotFound
  -                          , XMLUni::fgZeroLenString
  -                        );
  -                    }
  -
  -                    DTDScanner fDTDScanner((DTDGrammar*)fGrammar, fEntityDeclPool, fDocTypeHandler);
  -                    fDTDScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
  -                    fDTDScanner.scanDocTypeDecl(fReuseGrammar);
  +                    scanDocTypeDecl();
   
                       // if reusing grammar, this has been validated already in first scan
                       // skip for performance
  @@ -2294,6 +2275,302 @@
               , "in prolog"
           );
       }
  +    }
  +}
  +
  +//
  +//  This method handles the high level logic of scanning the DOCType
  +//  declaration. This calls the DTDScanner and kicks off both the scanning of
  +//  the internal subset and the scanning of the external subset, if any.
  +//
  +//  When we get here the '<!DOCTYPE' part has already been scanned, which is
  +//  what told us that we had a doc type decl to parse.
  +//
  +
  +void XMLScanner::scanDocTypeDecl()
  +{
  +    if (!fReuseGrammar && fValidatorFromUser && !fValidator->handlesDTD())
  +    {
  +        ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
  +    }
  +
  +    //
  +    //  We have a doc type. So, create a DTDScanner and
  +    //  switch the Grammar to the emptyNamespace one.
  +    //
  +
  +    if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate)
  +    {
  +        fValidator->emitError
  +        (
  +            XMLValid::GrammarNotFound
  +          , XMLUni::fgZeroLenString
  +        );
  +    }
  +
  +    DTDScanner dtdScanner((DTDGrammar*)fGrammar, fEntityDeclPool, fDocTypeHandler);
  +    dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
  +
  +    if (fDocTypeHandler)
  +        fDocTypeHandler->resetDocType();
  +
  +    // There must be some space after DOCTYPE
  +    if (!fReaderMgr.skipPastSpaces())
  +    {
  +        emitError(XMLErrs::ExpectedWhitespace);
  +
  +        // Just skip the Doctype declaration and return
  +        fReaderMgr.skipPastChar(chCloseAngle);
  +        return;
  +    }
  +
  +    // Get a buffer for the root element
  +    XMLBufBid bbRootName(&fBufMgr);
  +
  +    //
  +    //  Get a name from the input, which should be the name of the root
  +    //  element of the upcoming content.
  +    //
  +    fReaderMgr.getName(bbRootName.getBuffer());
  +    if (bbRootName.isEmpty())
  +    {
  +        emitError(XMLErrs::NoRootElemInDOCTYPE);
  +        fReaderMgr.skipPastChar(chCloseAngle);
  +        return;
  +    }
  +
  +    //
  +    //  Store the root element name for later check
  +    //
  +    setRootElemName(bbRootName.getRawBuffer());
  +
  +    //
  +    //  This element obviously is not going to exist in the element decl
  +    //  pool yet, but we need to call docTypeDecl. So force it into
  +    //  the element decl pool, marked as being there because it was in
  +    //  the DOCTYPE. Later, when its declared, the status will be updated.
  +    //
  +    //  Only do this if we are not reusing the validator! If we are reusing,
  +    //  then look it up instead. It has to exist!
  +    //
  +    DTDElementDecl* rootDecl;
  +    Janitor<DTDElementDecl> janSrc(0);
  +
  +    if (fReuseGrammar)
  +    {
  +        if (fGrammar->getGrammarType() == Grammar::DTDGrammarType) {
  +            rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
  +            if (rootDecl)
  +                ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
  +            else {
  +                rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
  +                rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
  +                rootDecl->setExternalElemDeclaration(true);
  +                ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
  +            }
  +        }
  +        else {
  +            rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
  +            rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
  +            rootDecl->setExternalElemDeclaration(true);
  +            janSrc.reset(rootDecl);
  +        }
  +    }
  +     else
  +    {
  +        rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
  +        rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
  +        rootDecl->setExternalElemDeclaration(true);
  +        ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
  +    }
  +
  +    // Skip any spaces after the name
  +    fReaderMgr.skipPastSpaces();
  +
  +    //
  +    //  And now if we are looking at a >, then we are done. It is not
  +    //  required to have an internal or external subset, though why you
  +    //  would not escapes me.
  +    //
  +    if (fReaderMgr.skippedChar(chCloseAngle)) {
  +        //
  +        //  If we have a doc type handler and advanced callbacks are enabled,
  +        //  call the doctype event.
  +        //
  +        if (fDocTypeHandler)
  +            fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
  +        return;
  +    }
  +
  +    // either internal/external subset
  +    if(!fReuseGrammar) {
  +        if (fValScheme == Val_Auto && !fValidate)
  +            fValidate = true;
  +    }
  +
  +
  +    bool    hasIntSubset = false;
  +    bool    hasExtSubset = false;
  +    XMLCh*  sysId = 0;
  +    XMLCh*  pubId = 0;
  +
  +    //
  +    //  If the next character is '[' then we have no external subset cause
  +    //  there is no system id, just the opening character of the internal
  +    //  subset. Else, has to be an id.
  +    //
  +    // Just look at the next char, don't eat it.
  +    if (fReaderMgr.peekNextChar() == chOpenSquare)
  +    {
  +        hasIntSubset = true;
  +    }
  +     else
  +    {
  +        // Indicate we have an external subset
  +        hasExtSubset = true;
  +        fHasNoDTD = false;
  +
  +        // Get buffers for the ids
  +        XMLBufBid bbPubId(&fBufMgr);
  +        XMLBufBid bbSysId(&fBufMgr);
  +
  +        // Get the external subset id
  +        if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
  +        {
  +            fReaderMgr.skipPastChar(chCloseAngle);
  +            return;
  +        }
  +
  +        // Get copies of the ids we got
  +        pubId = XMLString::replicate(bbPubId.getRawBuffer());
  +        sysId = XMLString::replicate(bbSysId.getRawBuffer());
  +
  +        // Skip spaces and check again for the opening of an internal subset
  +        fReaderMgr.skipPastSpaces();
  +
  +        // Just look at the next char, don't eat it.
  +        if (fReaderMgr.peekNextChar() == chOpenSquare) {
  +            hasIntSubset = true;
  +        }
  +    }
  +
  +    // Insure that the ids get cleaned up, if they got allocated
  +    ArrayJanitor<XMLCh> janSysId(sysId);
  +    ArrayJanitor<XMLCh> janPubId(pubId);
  +
  +    //
  +    //  If we have a doc type handler and advanced callbacks are enabled,
  +    //  call the doctype event.
  +    //
  +    if (fDocTypeHandler)
  +        fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset);
  +
  +    //
  +    //  Ok, if we had an internal subset, we are just past the [ character
  +    //  and need to parse that first.
  +    //
  +    if (hasIntSubset)
  +    {
  +        // Eat the opening square bracket
  +        fReaderMgr.getNextChar();
  +
  +        // We can't have any internal subset if we are reusing the validator
  +        if (fReuseGrammar)
  +            ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS);
  +
  +        //
  +        //  And try to scan the internal subset. If we fail, try to recover
  +        //  by skipping forward tot he close angle and returning.
  +        //
  +        if (!dtdScanner.scanInternalSubset())
  +        {
  +            fReaderMgr.skipPastChar(chCloseAngle);
  +            return;
  +        }
  +
  +        //
  +        //  Do a sanity check that some expanded PE did not propogate out of
  +        //  the doctype. This could happen if it was terminated early by bad
  +        //  syntax.
  +        //
  +        if (fReaderMgr.getReaderDepth() > 1)
  +        {
  +            emitError(XMLErrs::PEPropogated);
  +
  +            // Ask the reader manager to pop back down to the main level
  +            fReaderMgr.cleanStackBackTo(1);
  +        }
  +
  +        fReaderMgr.skipPastSpaces();
  +    }
  +
  +    // And that should leave us at the closing > of the DOCTYPE line
  +    if (!fReaderMgr.skippedChar(chCloseAngle))
  +    {
  +        //
  +        //  Do a special check for the common scenario of an extra ] char at
  +        //  the end. This is easy to recover from.
  +        //
  +        if (fReaderMgr.skippedChar(chCloseSquare)
  +        &&  fReaderMgr.skippedChar(chCloseAngle))
  +        {
  +            emitError(XMLErrs::ExtraCloseSquare);
  +        }
  +         else
  +        {
  +            emitError(XMLErrs::UnterminatedDOCTYPE);
  +            fReaderMgr.skipPastChar(chCloseAngle);
  +        }
  +    }
  +
  +    //
  +    //  If we had an external subset, then we need to deal with that one
  +    //  next. If we are reusing the validator, then don't scan it.
  +    //
  +    if (hasExtSubset && !fReuseGrammar && (fLoadExternalDTD || fValidate))
  +    {
  +        // And now create a reader to read this entity
  +        InputSource* srcUsed;
  +        XMLReader* reader = fReaderMgr.createReader
  +        (
  +            sysId
  +            , pubId
  +            , false
  +            , XMLReader::RefFrom_NonLiteral
  +            , XMLReader::Type_General
  +            , XMLReader::Source_External
  +            , srcUsed
  +        );
  +
  +        // Put a janitor on the input source
  +        Janitor<InputSource> janSrc(srcUsed);
  +
  +        //
  +        //  If it failed then throw an exception
  +        //
  +        if (!reader)
  +            ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId());
  +
  +        //
  +        //  In order to make the processing work consistently, we have to
  +        //  make this look like an external entity. So create an entity
  +        //  decl and fill it in and push it with the reader, as happens
  +        //  with an external entity. Put a janitor on it to insure it gets
  +        //  cleaned up. The reader manager does not adopt them.
  +        //
  +        const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
  +        DTDEntityDecl* declDTD = new DTDEntityDecl(gDTDStr);
  +        declDTD->setSystemId(sysId);
  +        Janitor<DTDEntityDecl> janDecl(declDTD);
  +
  +        // Mark this one as a throw at end
  +        reader->setThrowAtEnd(true);
  +
  +        // And push it onto the stack, with its pseudo name
  +        fReaderMgr.pushReader(reader, declDTD);
  +
  +        // Tell it its not in an include section
  +        dtdScanner.scanExtSubsetDecl(false);
       }
   }
   
  
  
  
  1.5       +26 -8     xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
  
  Index: XMLScanner.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLScanner.hpp	27 May 2002 18:42:14 -0000	1.4
  +++ XMLScanner.hpp	30 May 2002 16:20:57 -0000	1.5
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: XMLScanner.hpp,v $
  + * Revision 1.5  2002/05/30 16:20:57  tng
  + * Add feature to optionally ignore external DTD.
  + *
    * Revision 1.4  2002/05/27 18:42:14  tng
    * To get ready for 64 bit large file, use XMLSSize_t to represent line and column number.
    *
  @@ -396,6 +399,7 @@
       bool getHasNoDTD() const;
       XMLCh* getExternalSchemaLocation() const;
       XMLCh* getExternalNoNamespaceSchemaLocation() const;
  +    bool getLoadExternalDTD() const;
   
       // -----------------------------------------------------------------------
       //  Getter methods
  @@ -492,6 +496,7 @@
       void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation);
       void setExternalSchemaLocation(const char* const schemaLocation);
       void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation);
  +    void setLoadExternalDTD(const bool loadDTD);
   
       // -----------------------------------------------------------------------
       //  Mutator methods
  @@ -503,7 +508,7 @@
       //  setValidationScheme() instead.
       // -----------------------------------------------------------------------
       bool getDoValidation() const;
  -    void setDoValidation(const bool validate, const bool setValScheme = true);
  +    void setDoValidation(const bool validate);
   
   
       // -----------------------------------------------------------------------
  @@ -683,6 +688,7 @@
                   XMLBuffer&  toFill
           , const XMLCh       chEndChar
       );
  +    void scanDocTypeDecl();
   
       // -----------------------------------------------------------------------
       //  Private helper methods
  @@ -908,6 +914,9 @@
       //      The no target namespace XML Schema Location that was specified
       //      externally using setExternalNoNamespaceSchemaLocation.
       //
  +    //  fLoadExternalDTD
  +    //      This flag indicates whether the external DTD be loaded or not
  +    //
       // -----------------------------------------------------------------------
       bool                        fDoNamespaces;
       bool                        fExitOnFirstFatal;
  @@ -964,6 +973,7 @@
       XMLCh*                      fRootElemName;
       XMLCh*                      fExternalSchemaLocation;
       XMLCh*                      fExternalNoNamespaceSchemaLocation;
  +    bool                        fLoadExternalDTD;
   };
   
   
  @@ -1162,6 +1172,11 @@
       return fExternalNoNamespaceSchemaLocation;
   }
   
  +inline bool XMLScanner::getLoadExternalDTD() const
  +{
  +    return fLoadExternalDTD;
  +}
  +
   // ---------------------------------------------------------------------------
   //  XMLScanner: Setter methods
   // ---------------------------------------------------------------------------
  @@ -1281,6 +1296,11 @@
       fExternalNoNamespaceSchemaLocation = XMLString::transcode(noNamespaceSchemaLocation);
   }
   
  +inline void XMLScanner::setLoadExternalDTD(const bool loadDTD)
  +{
  +    fLoadExternalDTD = loadDTD;
  +}
  +
   
   // ---------------------------------------------------------------------------
   //  XMLScanner: Mutator methods
  @@ -1299,15 +1319,13 @@
       return fValidate;
   }
   
  -inline void XMLScanner::setDoValidation(const bool validate, const bool setValScheme)
  +inline void XMLScanner::setDoValidation(const bool validate)
   {
       fValidate = validate;
  -    if (setValScheme) {
  -        if (fValidate)
  -            fValScheme = Val_Always;
  -        else
  -            fValScheme = Val_Never;
  -    }
  +    if (fValidate)
  +        fValScheme = Val_Always;
  +    else
  +        fValScheme = Val_Never;
   }
   
   #endif
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org