You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/05/20 20:14:47 UTC

svn commit: r1125493 - /tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java

Author: jukka
Date: Fri May 20 18:14:47 2011
New Revision: 1125493

URL: http://svn.apache.org/viewvc?rev=1125493&view=rev
Log:
TIKA-259: Safe parsing of droste.zip

Add a (default) limit of at most 30 levels of XML nesting in parser output. That should be enough for all normal documents, but catches droste.zip quite nicely.

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java?rev=1125493&r1=1125492&r2=1125493&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/SecureContentHandler.java Fri May 20 18:14:47 2011
@@ -20,6 +20,7 @@ import java.io.IOException;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
+import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -28,9 +29,9 @@ import org.xml.sax.SAXException;
  * attacks against Tika parsers.
  * <p>
  * Currently this class simply compares the number of output characters
- * to to the number of input bytes, and throws an exception if the output
- * is truly excessive when compared to the input. This is a strong indication
- * of a zip bomb.
+ * to to the number of input bytes and keeps track of the XML nesting levels.
+ * An exception gets thrown if the output seems excessive compared to the
+ * input document. This is a strong indication of a zip bomb.
  *
  * @since Apache Tika 0.4
  * @see <a href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
@@ -48,6 +49,11 @@ public class SecureContentHandler extend
     private long characterCount = 0;
 
     /**
+     * The current XML element depth.
+     */
+    private int currentDepth = 0;
+
+    /**
      * Output threshold.
      */
     private long threshold = 1000000;
@@ -58,6 +64,11 @@ public class SecureContentHandler extend
     private long ratio = 100;
 
     /**
+     * Maximum XML element nesting level.
+     */
+    private int maxDepth = 30;
+
+    /**
      * Decorates the given content handler with zip bomb prevention based
      * on the count of bytes read from the given counting input stream.
      * The resulting decorator can be passed to a Tika parser along with
@@ -117,6 +128,26 @@ public class SecureContentHandler extend
     }
 
     /**
+     * Returns the maximum XML element nesting level.
+     *
+     * @return maximum XML element nesting level
+     */
+    public int getMaximumDepth() {
+        return maxDepth;
+    }
+
+
+    /**
+     * Sets the maximum XML element nesting level. If this depth level is
+     * exceeded then an exception gets thrown.
+     *
+     * @param depth maximum XML element nesting level
+     */
+    public void setMaximumDepth(int depth) {
+        this.maxDepth = depth;
+    }
+
+    /**
      * Converts the given {@link SAXException} to a corresponding
      * {@link TikaException} if it's caused by this instance detecting
      * a zip bomb.
@@ -156,11 +187,35 @@ public class SecureContentHandler extend
         long byteCount = getByteCount();
         if (characterCount > threshold
                 && characterCount > byteCount * ratio) {
-            throw new SecureSAXException(byteCount);
+            throw new SecureSAXException(
+                    "Suspected zip bomb: "
+                    + byteCount + " input bytes produced "
+                    + characterCount + " output characters");
         }
     }
 
     @Override
+    public void startElement(
+            String uri, String localName, String name, Attributes atts)
+            throws SAXException {
+        currentDepth++;
+        if (currentDepth < maxDepth) {
+            super.startElement(uri, localName, name, atts);
+        } else {
+            throw new SecureSAXException(
+                    "Suspected zip bomb: "
+                    + currentDepth + " levels of XML element nesting");
+        }
+    }
+
+    @Override
+    public void endElement(
+            String uri, String localName, String name) throws SAXException {
+        currentDepth--;
+        super.endElement(uri, localName, name);
+    }
+
+    @Override
     public void characters(char[] ch, int start, int length)
             throws SAXException {
         advance(length);
@@ -181,10 +236,11 @@ public class SecureContentHandler extend
      */
     private class SecureSAXException extends SAXException {
 
-        public SecureSAXException(long byteCount) throws SAXException {
-            super("Suspected zip bomb: "
-                    + byteCount + " input bytes produced "
-                    + characterCount + " output characters");
+        /** Serial version UID.*/
+        private static final long serialVersionUID = 2285245380321771445L;
+
+        public SecureSAXException(String message) throws SAXException {
+            super(message);
         }
 
         public boolean isCausedBy(SecureContentHandler handler) {