You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by sk...@apache.org on 2005/02/06 12:33:25 UTC
svn commit: r151579 - in
jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2:
Digester.java SAXHandler.java
Author: skitching
Date: Sun Feb 6 03:33:22 2005
New Revision: 151579
URL: http://svn.apache.org/viewcvs?view=rev&rev=151579
Log:
* setKnownEntities/registerKnownEntities: a target URL of ""
now means resolve the entity to an empty InputSource.
* added methods setIgnoreExternalDTD/getIgnoreExternalDTD
Modified:
jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java
jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java
Modified: jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java
URL: http://svn.apache.org/viewcvs/jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java?view=diff&r1=151578&r2=151579
==============================================================================
--- jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java (original)
+++ jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/Digester.java Sun Feb 6 03:33:22 2005
@@ -380,6 +380,10 @@
* default Entity Resolver when resolving entities in the input xml
* (including the DTD or schema specified with the DOCTYPE).
* <p>
+ * If the value in a map entry (ie the "URI") is an empty string, then
+ * when the parser asks for the entity to be resolved, an empty InputSource
+ * will be returned, effectively ignoring the entity.
+ * <p>
* See {@link #getKnownEntities}, and {@link #setEntityResolver}.
*/
public void setKnownEntities(Map knownEntities) {
@@ -398,6 +402,10 @@
* done for the input document's DTD, so that the DTD can be retrieved
* from a local file.</p>
*
+ * <p>If the value in a map entry (ie the "URI") is an empty string, then
+ * when the parser asks for the entity to be resolved, an empty InputSource
+ * will be returned, effectively ignoring the entity.</p>
+ *
* <p>This implementation provides only basic functionality. If more
* sophisticated features are required,using {@link #setEntityResolver} to
* set a custom resolver is recommended. Note in particular that if the
@@ -429,6 +437,27 @@
return saxHandler.getKnownEntities();
}
+ /**
+ * Specify whether an external DTD should be ignored, ie treated as if
+ * it were an empty file. This can be dangerous; DTDs can potentially
+ * contain definitions for default attribute values and entities that
+ * affect the meaning of the xml document, so skipping them can cause
+ * incorrect output. However in many cases it is known that the DTD
+ * does no such thing, so processing of it can be suppressed.
+ * <p>
+ * This flag defaults to false (ie external dtds are read during the parse).
+ */
+ public void setIgnoreExternalDTD(boolean state) {
+ saxHandler.setIgnoreExternalDTD(state);
+ }
+
+ /**
+ * See setIgnoreExternalDTD.
+ */
+ public boolean getIgnoreExternalDTD() {
+ return saxHandler.getIgnoreExternalDTD();
+ }
+
// ------------------------------------------------------- Public Methods
/**
Modified: jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java
URL: http://svn.apache.org/viewcvs/jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java?view=diff&r1=151578&r2=151579
==============================================================================
--- jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java (original)
+++ jakarta/commons/proper/digester/branches/digester2/src/java/org/apache/commons/digester2/SAXHandler.java Sun Feb 6 03:33:22 2005
@@ -23,6 +23,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
+import java.io.StringReader;
+
import java.lang.reflect.InvocationTargetException;
import java.util.EmptyStackException;
import java.util.HashMap;
@@ -126,6 +128,13 @@
private Locator locator = null;
/**
+ * A count of the number of entities resolved. Currently, we only
+ * care whether this is zero or one, so a boolean could do as well.
+ * However it seems likely that a count could be useful at some time.
+ */
+ private int numEntitiesResolved = 0;
+
+ /**
* A map of known external entities that input xml documents may refer to.
* via public or system IDs. The keys of the map entries are public or
* system IDs, and the values are URLs (typically local files) pointing
@@ -139,6 +148,11 @@
* See setAllowUnknownExternalEntities.
*/
private boolean allowUnknownExternalEntities = false;
+
+ /**
+ * See setIgnoreExternalDTD.
+ */
+ private boolean ignoreExternalDTD = false;
/**
* An object which contains state information that evolves
@@ -348,6 +362,7 @@
/**
* Get the system identifier of the DTD associated with the document
* currently being parsed, or most recently parsed.
+ *
* <p>
* If the input document has no DOCTYPE declaration, then null will
* be returned.
@@ -356,6 +371,10 @@
* the org.xml.sax.ext.LexicalHandler interface. If the parser does not
* provide callbacks via this interface, then no system id information
* will be available (null will be returned).
+ * <p>
+ * Note also that the SystemId value returned is exactly as it was
+ * defined in the DOCTYPE tag; relative URLs are NOT resolved relative
+ * to the base of the current document.
*/
public String getDTDSystemId() {
return this.dtdSystemId;
@@ -576,6 +595,10 @@
* Specifies a map of (publicId->URI) pairings that will be used when
* resolving entities in the input xml (including the DTD specified with
* DOCTYPE, or schema specified with xsi:schemaLocation).
+ * <p>
+ * If the value in a map entry (ie the "URI") is an empty string, then
+ * when the parser asks for the entity to be resolved, an empty InputSource
+ * will be returned, effectively ignoring the entity.
*/
public void setKnownEntities(Map knownEntities) {
this.knownEntities = knownEntities;
@@ -598,6 +621,10 @@
* done for the input document's DTD, so that the DTD can be retrieved
* from a local file.</p>
*
+ * <p>If the value in a map entry (ie the "URI") is an empty string, then
+ * when the parser asks for the entity to be resolved, an empty InputSource
+ * will be returned, effectively ignoring the entity.</p>
+ *
* <p>This implementation provides only basic functionality. If more
* sophisticated features are required,using {@link #setEntityResolver} to
* set a custom resolver is recommended. Note in particular that if the
@@ -646,6 +673,27 @@
}
/**
+ * Specify whether an external DTD should be ignored, ie treated as if
+ * it were an empty file. This can be dangerous; DTDs can potentially
+ * contain definitions for default attribute values and entities that
+ * affect the meaning of the xml document, so skipping them can cause
+ * incorrect output. However in many cases it is known that the DTD
+ * does no such thing, so processing of it can be suppressed.
+ * <p>
+ * This flag defaults to false (ie external dtds are read during the parse).
+ */
+ public void setIgnoreExternalDTD(boolean state) {
+ ignoreExternalDTD = state;
+ }
+
+ /**
+ * See setIgnoreExternalDTD.
+ */
+ public boolean getIgnoreExternalDTD() {
+ return ignoreExternalDTD;
+ }
+
+ /**
* Add a (pattern, action) pair to the RuleManager instance associated
* with this saxHandler. This is equivalent to
* <pre>
@@ -862,6 +910,7 @@
saxLog.debug("startDocument()");
}
+ numEntitiesResolved = 0;
dtdPublicId = null;
dtdSystemId = null;
@@ -1352,6 +1401,31 @@
}
}
+ // Keep count of the number of entities resolved. Currently, we only
+ // care whether this is zero or one, so a boolean could do as well.
+ // However it seems likely that a count could be useful at some time.
+ ++numEntitiesResolved;
+
+ // Is this the DTD? If there *is* a DTD (ie one was reported to the
+ // lexical handler) then it is presumed here that it will be the first
+ // entity resolved.
+ //
+ // Note that we can't just check whether this systemId is the same
+ // as the dtdSystemId, because the systemId parameter here has been
+ // expanded to an absolute ref, while the one passed to the
+ // LexicalHandler is in its original (possibly relative) form.
+ //
+ // It would be great to be able to use the EntityResolver2 interface
+ // which provides both the original and system ids, but that is
+ // probably not supported widely enough yet.
+ if ((numEntitiesResolved == 1) && (dtdSystemId != null)) {
+ if (ignoreExternalDTD) {
+ // this entity is the DTD, and the user wants to completely
+ // ignore it, so we return an "empty file".
+ return new InputSource(new StringReader(""));
+ }
+ }
+
// Has this public identifier been registered?
String entityURL = null;
if (publicId != null) {
@@ -1398,15 +1472,18 @@
+ " registered as a known entity, and systemId is null.");
}
- // Return an input source to our alternative URL
- if (log.isDebugEnabled()) {
- log.debug(" Resolving entity to '" + entityURL + "'");
- }
-
- try {
+ if (entityURL.length() == 0) {
+ // special case: when the user has mapped an empty to a URL being
+ // the empty string, we return an empty InputSource to the parser,
+ // effectively ignoring the entity.
+ return new InputSource(new StringReader(""));
+ } else {
+ // Return an input source to our alternative URL
+ if (log.isDebugEnabled()) {
+ log.debug(" Resolving entity to '" + entityURL + "'");
+ }
+
return new InputSource(entityURL);
- } catch (Exception e) {
- throw createSAXException(e);
}
}
@@ -1544,6 +1621,8 @@
* Invoked when the DOCTYPE tag is found in the input xml. The public
* and system ids present in that declaration are stored and can be
* retrieved later via the getDTDPublicId and getDTDSystemId methods.
+ * <p>
+ * This method is always preceded by startDocument.
*/
public void startDTD(String name, String publicId, String systemId) {
dtdPublicId = publicId;
@@ -1551,7 +1630,9 @@
}
/**
- * See {@link #startDTD}.
+ * See {@link #startDTD}.
+ * <p>
+ * This method always precedes the first startElement.
*/
public void endDTD() {
; // ignore
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org