You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/05/20 20:14:47 UTC
svn commit: r1125493 -
/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java
Author: jukka
Date: Fri May 20 18:14:47 2011
New Revision: 1125493
URL: http://svn.apache.org/viewvc?rev=1125493&view=rev
Log:
TIKA-259: Safe parsing of droste.zip
Add a (default) limit of at most 30 levels of XML nesting in parser output. That should be enough for all normal documents, but catches droste.zip quite nicely.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java?rev=1125493&r1=1125492&r2=1125493&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java Fri May 20 18:14:47 2011
@@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
+import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -28,9 +29,9 @@ import org.xml.sax.SAXException;
* attacks against Tika parsers.
* <p>
* Currently this class simply compares the number of output characters
- * to to the number of input bytes, and throws an exception if the output
- * is truly excessive when compared to the input. This is a strong indication
- * of a zip bomb.
+ * to to the number of input bytes and keeps track of the XML nesting levels.
+ * An exception gets thrown if the output seems excessive compared to the
+ * input document. This is a strong indication of a zip bomb.
*
* @since Apache Tika 0.4
* @see <a href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
@@ -48,6 +49,11 @@ public class SecureContentHandler extend
private long characterCount = 0;
/**
+ * The current XML element depth.
+ */
+ private int currentDepth = 0;
+
+ /**
* Output threshold.
*/
private long threshold = 1000000;
@@ -58,6 +64,11 @@ public class SecureContentHandler extend
private long ratio = 100;
/**
+ * Maximum XML element nesting level.
+ */
+ private int maxDepth = 30;
+
+ /**
* Decorates the given content handler with zip bomb prevention based
* on the count of bytes read from the given counting input stream.
* The resulting decorator can be passed to a Tika parser along with
@@ -117,6 +128,26 @@ public class SecureContentHandler extend
}
/**
+ * Returns the maximum XML element nesting level.
+ *
+ * @return maximum XML element nesting level
+ */
+ public int getMaximumDepth() {
+ return maxDepth;
+ }
+
+
+ /**
+ * Sets the maximum XML element nesting level. If this depth level is
+ * exceeded then an exception gets thrown.
+ *
+ * @param depth maximum XML element nesting level
+ */
+ public void setMaximumDepth(int depth) {
+ this.maxDepth = depth;
+ }
+
+ /**
* Converts the given {@link SAXException} to a corresponding
* {@link TikaException} if it's caused by this instance detecting
* a zip bomb.
@@ -156,11 +187,35 @@ public class SecureContentHandler extend
long byteCount = getByteCount();
if (characterCount > threshold
&& characterCount > byteCount * ratio) {
- throw new SecureSAXException(byteCount);
+ throw new SecureSAXException(
+ "Suspected zip bomb: "
+ + byteCount + " input bytes produced "
+ + characterCount + " output characters");
}
}
@Override
+ public void startElement(
+ String uri, String localName, String name, Attributes atts)
+ throws SAXException {
+ currentDepth++;
+ if (currentDepth < maxDepth) {
+ super.startElement(uri, localName, name, atts);
+ } else {
+ throw new SecureSAXException(
+ "Suspected zip bomb: "
+ + currentDepth + " levels of XML element nesting");
+ }
+ }
+
+ @Override
+ public void endElement(
+ String uri, String localName, String name) throws SAXException {
+ currentDepth--;
+ super.endElement(uri, localName, name);
+ }
+
+ @Override
public void characters(char[] ch, int start, int length)
throws SAXException {
advance(length);
@@ -181,10 +236,11 @@ public class SecureContentHandler extend
*/
private class SecureSAXException extends SAXException {
- public SecureSAXException(long byteCount) throws SAXException {
- super("Suspected zip bomb: "
- + byteCount + " input bytes produced "
- + characterCount + " output characters");
+ /** Serial version UID.*/
+ private static final long serialVersionUID = 2285245380321771445L;
+
+ public SecureSAXException(String message) throws SAXException {
+ super(message);
}
public boolean isCausedBy(SecureContentHandler handler) {