You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by tn...@apache.org on 2002/05/30 18:20:57 UTC
cvs commit: xml-xerces/c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp
tng 02/05/30 09:20:57
Modified: c/src/xercesc/internal XMLScanner.cpp XMLScanner.hpp
Log:
Add feature to optionally ignore external DTD.
Revision Changes Path
1.10 +300 -23 xml-xerces/c/src/xercesc/internal/XMLScanner.cpp
Index: XMLScanner.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.cpp,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- XMLScanner.cpp 28 May 2002 20:42:38 -0000 1.9
+++ XMLScanner.cpp 30 May 2002 16:20:57 -0000 1.10
@@ -55,7 +55,7 @@
*/
/*
- * $Id: XMLScanner.cpp,v 1.9 2002/05/28 20:42:38 tng Exp $
+ * $Id: XMLScanner.cpp,v 1.10 2002/05/30 16:20:57 tng Exp $
*/
@@ -221,6 +221,7 @@
, fRootElemName(0)
, fExternalSchemaLocation(0)
, fExternalNoNamespaceSchemaLocation(0)
+ , fLoadExternalDTD(true)
{
commonInit();
@@ -284,6 +285,7 @@
, fRootElemName(0)
, fExternalSchemaLocation(0)
, fExternalNoNamespaceSchemaLocation(0)
+ , fLoadExternalDTD(true)
{
commonInit();
@@ -2215,28 +2217,7 @@
}
else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString))
{
- if (!fReuseGrammar && fValidatorFromUser && !fValidator->handlesDTD())
- {
- ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
- }
-
- //
- // We have a doc type. So, create a DTDScanner and
- // switch the Grammar to the emptyNamespace one.
- //
-
- if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate)
- {
- fValidator->emitError
- (
- XMLValid::GrammarNotFound
- , XMLUni::fgZeroLenString
- );
- }
-
- DTDScanner fDTDScanner((DTDGrammar*)fGrammar, fEntityDeclPool, fDocTypeHandler);
- fDTDScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
- fDTDScanner.scanDocTypeDecl(fReuseGrammar);
+ scanDocTypeDecl();
// if reusing grammar, this has been validated already in first scan
// skip for performance
@@ -2294,6 +2275,302 @@
, "in prolog"
);
}
+ }
+}
+
+//
+// This method handles the high level logic of scanning the DOCType
+// declaration. This calls the DTDScanner and kicks off both the scanning of
+// the internal subset and the scanning of the external subset, if any.
+//
+// When we get here the '<!DOCTYPE' part has already been scanned, which is
+// what told us that we had a doc type decl to parse.
+//
+
+void XMLScanner::scanDocTypeDecl()
+{
+ if (!fReuseGrammar && fValidatorFromUser && !fValidator->handlesDTD())
+ {
+ ThrowXML(RuntimeException, XMLExcepts::Gen_NoDTDValidator);
+ }
+
+ //
+ // We have a doc type. So, create a DTDScanner and
+ // switch the Grammar to the emptyNamespace one.
+ //
+
+ if (!switchGrammar(XMLUni::fgZeroLenString) && fValidate)
+ {
+ fValidator->emitError
+ (
+ XMLValid::GrammarNotFound
+ , XMLUni::fgZeroLenString
+ );
+ }
+
+ DTDScanner dtdScanner((DTDGrammar*)fGrammar, fEntityDeclPool, fDocTypeHandler);
+ dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
+
+ if (fDocTypeHandler)
+ fDocTypeHandler->resetDocType();
+
+ // There must be some space after DOCTYPE
+ if (!fReaderMgr.skipPastSpaces())
+ {
+ emitError(XMLErrs::ExpectedWhitespace);
+
+ // Just skip the Doctype declaration and return
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ // Get a buffer for the root element
+ XMLBufBid bbRootName(&fBufMgr);
+
+ //
+ // Get a name from the input, which should be the name of the root
+ // element of the upcoming content.
+ //
+ fReaderMgr.getName(bbRootName.getBuffer());
+ if (bbRootName.isEmpty())
+ {
+ emitError(XMLErrs::NoRootElemInDOCTYPE);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ //
+ // Store the root element name for later check
+ //
+ setRootElemName(bbRootName.getRawBuffer());
+
+ //
+ // This element obviously is not going to exist in the element decl
+ // pool yet, but we need to call docTypeDecl. So force it into
+ // the element decl pool, marked as being there because it was in
+ // the DOCTYPE. Later, when its declared, the status will be updated.
+ //
+ // Only do this if we are not reusing the validator! If we are reusing,
+ // then look it up instead. It has to exist!
+ //
+ DTDElementDecl* rootDecl;
+ Janitor<DTDElementDecl> janSrc(0);
+
+ if (fReuseGrammar)
+ {
+ if (fGrammar->getGrammarType() == Grammar::DTDGrammarType) {
+ rootDecl = (DTDElementDecl*) fGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
+ if (rootDecl)
+ ((DTDGrammar*)fGrammar)->setRootElemId(rootDecl->getId());
+ else {
+ rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
+ rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
+ rootDecl->setExternalElemDeclaration(true);
+ ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
+ }
+ }
+ else {
+ rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
+ rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
+ rootDecl->setExternalElemDeclaration(true);
+ janSrc.reset(rootDecl);
+ }
+ }
+ else
+ {
+ rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
+ rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
+ rootDecl->setExternalElemDeclaration(true);
+ ((DTDGrammar*)fGrammar)->setRootElemId(fGrammar->putElemDecl(rootDecl));
+ }
+
+ // Skip any spaces after the name
+ fReaderMgr.skipPastSpaces();
+
+ //
+ // And now if we are looking at a >, then we are done. It is not
+ // required to have an internal or external subset, though why you
+ // would not escapes me.
+ //
+ if (fReaderMgr.skippedChar(chCloseAngle)) {
+ //
+ // If we have a doc type handler and advanced callbacks are enabled,
+ // call the doctype event.
+ //
+ if (fDocTypeHandler)
+ fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
+ return;
+ }
+
+ // either internal/external subset
+ if(!fReuseGrammar) {
+ if (fValScheme == Val_Auto && !fValidate)
+ fValidate = true;
+ }
+
+
+ bool hasIntSubset = false;
+ bool hasExtSubset = false;
+ XMLCh* sysId = 0;
+ XMLCh* pubId = 0;
+
+ //
+ // If the next character is '[' then we have no external subset cause
+ // there is no system id, just the opening character of the internal
+ // subset. Else, has to be an id.
+ //
+ // Just look at the next char, don't eat it.
+ if (fReaderMgr.peekNextChar() == chOpenSquare)
+ {
+ hasIntSubset = true;
+ }
+ else
+ {
+ // Indicate we have an external subset
+ hasExtSubset = true;
+ fHasNoDTD = false;
+
+ // Get buffers for the ids
+ XMLBufBid bbPubId(&fBufMgr);
+ XMLBufBid bbSysId(&fBufMgr);
+
+ // Get the external subset id
+ if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
+ {
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ // Get copies of the ids we got
+ pubId = XMLString::replicate(bbPubId.getRawBuffer());
+ sysId = XMLString::replicate(bbSysId.getRawBuffer());
+
+ // Skip spaces and check again for the opening of an internal subset
+ fReaderMgr.skipPastSpaces();
+
+ // Just look at the next char, don't eat it.
+ if (fReaderMgr.peekNextChar() == chOpenSquare) {
+ hasIntSubset = true;
+ }
+ }
+
+ // Insure that the ids get cleaned up, if they got allocated
+ ArrayJanitor<XMLCh> janSysId(sysId);
+ ArrayJanitor<XMLCh> janPubId(pubId);
+
+ //
+ // If we have a doc type handler and advanced callbacks are enabled,
+ // call the doctype event.
+ //
+ if (fDocTypeHandler)
+ fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset);
+
+ //
+ // Ok, if we had an internal subset, we are just past the [ character
+ // and need to parse that first.
+ //
+ if (hasIntSubset)
+ {
+ // Eat the opening square bracket
+ fReaderMgr.getNextChar();
+
+ // We can't have any internal subset if we are reusing the validator
+ if (fReuseGrammar)
+ ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS);
+
+ //
+ // And try to scan the internal subset. If we fail, try to recover
+ // by skipping forward tot he close angle and returning.
+ //
+ if (!dtdScanner.scanInternalSubset())
+ {
+ fReaderMgr.skipPastChar(chCloseAngle);
+ return;
+ }
+
+ //
+ // Do a sanity check that some expanded PE did not propogate out of
+ // the doctype. This could happen if it was terminated early by bad
+ // syntax.
+ //
+ if (fReaderMgr.getReaderDepth() > 1)
+ {
+ emitError(XMLErrs::PEPropogated);
+
+ // Ask the reader manager to pop back down to the main level
+ fReaderMgr.cleanStackBackTo(1);
+ }
+
+ fReaderMgr.skipPastSpaces();
+ }
+
+ // And that should leave us at the closing > of the DOCTYPE line
+ if (!fReaderMgr.skippedChar(chCloseAngle))
+ {
+ //
+ // Do a special check for the common scenario of an extra ] char at
+ // the end. This is easy to recover from.
+ //
+ if (fReaderMgr.skippedChar(chCloseSquare)
+ && fReaderMgr.skippedChar(chCloseAngle))
+ {
+ emitError(XMLErrs::ExtraCloseSquare);
+ }
+ else
+ {
+ emitError(XMLErrs::UnterminatedDOCTYPE);
+ fReaderMgr.skipPastChar(chCloseAngle);
+ }
+ }
+
+ //
+ // If we had an external subset, then we need to deal with that one
+ // next. If we are reusing the validator, then don't scan it.
+ //
+ if (hasExtSubset && !fReuseGrammar && (fLoadExternalDTD || fValidate))
+ {
+ // And now create a reader to read this entity
+ InputSource* srcUsed;
+ XMLReader* reader = fReaderMgr.createReader
+ (
+ sysId
+ , pubId
+ , false
+ , XMLReader::RefFrom_NonLiteral
+ , XMLReader::Type_General
+ , XMLReader::Source_External
+ , srcUsed
+ );
+
+ // Put a janitor on the input source
+ Janitor<InputSource> janSrc(srcUsed);
+
+ //
+ // If it failed then throw an exception
+ //
+ if (!reader)
+ ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId());
+
+ //
+ // In order to make the processing work consistently, we have to
+ // make this look like an external entity. So create an entity
+ // decl and fill it in and push it with the reader, as happens
+ // with an external entity. Put a janitor on it to insure it gets
+ // cleaned up. The reader manager does not adopt them.
+ //
+ const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
+ DTDEntityDecl* declDTD = new DTDEntityDecl(gDTDStr);
+ declDTD->setSystemId(sysId);
+ Janitor<DTDEntityDecl> janDecl(declDTD);
+
+ // Mark this one as a throw at end
+ reader->setThrowAtEnd(true);
+
+ // And push it onto the stack, with its pseudo name
+ fReaderMgr.pushReader(reader, declDTD);
+
+ // Tell it its not in an include section
+ dtdScanner.scanExtSubsetDecl(false);
}
}
1.5 +26 -8 xml-xerces/c/src/xercesc/internal/XMLScanner.hpp
Index: XMLScanner.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLScanner.hpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- XMLScanner.hpp 27 May 2002 18:42:14 -0000 1.4
+++ XMLScanner.hpp 30 May 2002 16:20:57 -0000 1.5
@@ -56,6 +56,9 @@
/*
* $Log: XMLScanner.hpp,v $
+ * Revision 1.5 2002/05/30 16:20:57 tng
+ * Add feature to optionally ignore external DTD.
+ *
* Revision 1.4 2002/05/27 18:42:14 tng
* To get ready for 64 bit large file, use XMLSSize_t to represent line and column number.
*
@@ -396,6 +399,7 @@
bool getHasNoDTD() const;
XMLCh* getExternalSchemaLocation() const;
XMLCh* getExternalNoNamespaceSchemaLocation() const;
+ bool getLoadExternalDTD() const;
// -----------------------------------------------------------------------
// Getter methods
@@ -492,6 +496,7 @@
void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation);
void setExternalSchemaLocation(const char* const schemaLocation);
void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation);
+ void setLoadExternalDTD(const bool loadDTD);
// -----------------------------------------------------------------------
// Mutator methods
@@ -503,7 +508,7 @@
// setValidationScheme() instead.
// -----------------------------------------------------------------------
bool getDoValidation() const;
- void setDoValidation(const bool validate, const bool setValScheme = true);
+ void setDoValidation(const bool validate);
// -----------------------------------------------------------------------
@@ -683,6 +688,7 @@
XMLBuffer& toFill
, const XMLCh chEndChar
);
+ void scanDocTypeDecl();
// -----------------------------------------------------------------------
// Private helper methods
@@ -908,6 +914,9 @@
// The no target namespace XML Schema Location that was specified
// externally using setExternalNoNamespaceSchemaLocation.
//
+ // fLoadExternalDTD
+ // This flag indicates whether the external DTD be loaded or not
+ //
// -----------------------------------------------------------------------
bool fDoNamespaces;
bool fExitOnFirstFatal;
@@ -964,6 +973,7 @@
XMLCh* fRootElemName;
XMLCh* fExternalSchemaLocation;
XMLCh* fExternalNoNamespaceSchemaLocation;
+ bool fLoadExternalDTD;
};
@@ -1162,6 +1172,11 @@
return fExternalNoNamespaceSchemaLocation;
}
+inline bool XMLScanner::getLoadExternalDTD() const
+{
+ return fLoadExternalDTD;
+}
+
// ---------------------------------------------------------------------------
// XMLScanner: Setter methods
// ---------------------------------------------------------------------------
@@ -1281,6 +1296,11 @@
fExternalNoNamespaceSchemaLocation = XMLString::transcode(noNamespaceSchemaLocation);
}
+inline void XMLScanner::setLoadExternalDTD(const bool loadDTD)
+{
+ fLoadExternalDTD = loadDTD;
+}
+
// ---------------------------------------------------------------------------
// XMLScanner: Mutator methods
@@ -1299,15 +1319,13 @@
return fValidate;
}
-inline void XMLScanner::setDoValidation(const bool validate, const bool setValScheme)
+inline void XMLScanner::setDoValidation(const bool validate)
{
fValidate = validate;
- if (setValScheme) {
- if (fValidate)
- fValScheme = Val_Always;
- else
- fValScheme = Val_Never;
- }
+ if (fValidate)
+ fValScheme = Val_Always;
+ else
+ fValScheme = Val_Never;
}
#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org