You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/04/26 00:39:31 UTC

svn commit: r768618 - in /lucene/tika/trunk/src: main/java/org/apache/tika/sax/SecureContentHandler.java test/java/org/apache/tika/sax/SecureContentHandlerTest.java

Author: jukka
Date: Sat Apr 25 22:39:30 2009
New Revision: 768618

URL: http://svn.apache.org/viewvc?rev=768618&view=rev
Log:
TIKA-216: Zip bomb prevention

Add a SecureContentHandler class that can prevent simple zip bombs.

The class can be extended later on to cover more sophisticated attacks.

Added:
    lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java   (with props)
    lucene/tika/trunk/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java   (with props)

Added: lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java?rev=768618&view=auto
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java (added)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java Sat Apr 25 22:39:30 2009
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import org.apache.commons.io.input.CountingInputStream;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Content handler decorator that attempts to prevent denial of service
+ * attacks against Tika parsers.
+ * <p>
+ * Currently this class simply compares the number of output characters
+ * to to the number of input bytes, and throws an exception if the output
+ * is truly excessive when compared to the input. This is a strong indication
+ * of a zip bomb.
+ *
+ * @since Apache Tika 0.4
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
+ */
+public class SecureContentHandler extends ContentHandlerDecorator {
+
+    /**
+     * The input stream that Tika is parsing.
+     */
+    private final CountingInputStream stream;
+
+    /**
+     * Number of output characters that Tika has produced so far.
+     */
+    private long count;
+
+    /**
+     * Decorates the given content handler with zip bomb prevention based
+     * on the count of bytes read from the given counting input stream.
+     * The resulting decorator can be passed to a Tika parser along with
+     * the given counting input stream.
+     *
+     * @param handler the content handler to be decorated
+     * @param stream the input stream to be parsed, wrapped into
+     *        a {@link CountingInputStream} decorator
+     */
+    public SecureContentHandler(
+            ContentHandler handler, CountingInputStream stream) {
+        super(handler);
+        this.stream = stream;
+        this.count = 0;
+    }
+
+    /**
+     * Records the given number of output characters (or more accurately
+     * UTF-16 code units). Throws an exception if the recorded number of
+     * characters highly exceeds the number of input bytes read.
+     *
+     * @param length number of new output characters produced
+     * @throws SAXException if a zip bomb is detected
+     */
+    private void advance(int length) throws SAXException {
+        count += length;
+        if (count > 1000000 && count > 100 * stream.getByteCount()) {
+            throw new SAXException("Zip Bomb detected!");
+        }
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+        advance(length);
+        super.characters(ch, start, length);
+    }
+
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length)
+            throws SAXException {
+        advance(length);
+        super.ignorableWhitespace(ch, start, length);
+    }
+
+}

Propchange: lucene/tika/trunk/src/main/java/org/apache/tika/sax/SecureContentHandler.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/tika/trunk/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java?rev=768618&view=auto
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java (added)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java Sat Apr 25 22:39:30 2009
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.IOException;
+
+import org.apache.commons.io.input.CountingInputStream;
+import org.apache.commons.io.input.NullInputStream;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for the {@link SecureContentHandler} class.
+ */
+public class SecureContentHandlerTest extends TestCase {
+
+    private static final int MANY_BYTES = 2000000;
+
+    private CountingInputStream stream;
+
+    private SecureContentHandler handler;
+
+    protected void setUp() {
+        stream = new CountingInputStream(new NullInputStream(MANY_BYTES));
+        handler = new SecureContentHandler(new DefaultHandler(), stream);
+    }
+
+    public void testZeroCharactersPerByte() throws IOException {
+        try {
+            char[] ch = new char[] { 'x' };
+            for (int i = 0; i < MANY_BYTES; i++) {
+                stream.read();
+            }
+            handler.characters(ch, 0, 1);
+        } catch (SAXException e) {
+            fail("Unexpected SAXException");
+        }
+    }
+
+    public void testOneCharacterPerByte() throws IOException {
+        try {
+            char[] ch = new char[1];
+            for (int i = 0; i < MANY_BYTES; i++) {
+                stream.read();
+                handler.characters(ch, 0, ch.length);
+            }
+        } catch (SAXException e) {
+            fail("Unexpected SAXException");
+        }
+    }
+
+    public void testTenCharactersPerByte() throws IOException {
+        try {
+            char[] ch = new char[10];
+            for (int i = 0; i < MANY_BYTES; i++) {
+                stream.read();
+                handler.characters(ch, 0, ch.length);
+            }
+        } catch (SAXException e) {
+            fail("Unexpected SAXException");
+        }
+    }
+
+    public void testManyCharactersPerByte() throws IOException {
+        try {
+            char[] ch = new char[1000];
+            for (int i = 0; i < MANY_BYTES; i++) {
+                stream.read();
+                handler.characters(ch, 0, ch.length);
+            }
+            fail("Expected SAXException not thrown");
+        } catch (SAXException e) {
+            // expected
+        }
+    }
+
+    public void testSomeCharactersWithoutInput() throws IOException {
+        try {
+            char[] ch = new char[100];
+            for (int i = 0; i < 100; i++) {
+                handler.characters(ch, 0, ch.length);
+            }
+        } catch (SAXException e) {
+            fail("Unexpected SAXException");
+        }
+    }
+
+    public void testManyCharactersWithoutInput() throws IOException {
+        try {
+            char[] ch = new char[100];
+            for (int i = 0; i < 20000; i++) {
+                handler.characters(ch, 0, ch.length);
+            }
+            fail("Expected SAXException not thrown");
+        } catch (SAXException e) {
+            // expected
+        }
+    }
+
+}

Propchange: lucene/tika/trunk/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native