You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/18 14:41:49 UTC

[tika] branch main updated: TIKA-3199 -- improve fuzzing of PDF streams; fix typos and improve documentation

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 4c4ef20  TIKA-3199 -- improve fuzzing of PDF streams; fix typos and improve documentation
4c4ef20 is described below

commit 4c4ef209ab78cc74b2b9c1779e0e1c8e06f754fb
Author: tballison <ta...@apache.org>
AuthorDate: Fri Sep 18 10:41:39 2020 -0400

    TIKA-3199 -- improve fuzzing of PDF streams; fix typos and improve documentation
---
 .../org/apache/tika/fuzzing/pdf/EvilCOSWriter.java | 22 ++++----
 .../tika/fuzzing/pdf/PDFTransformerConfig.java     | 59 +++++++++++++++++++---
 2 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
index 8a576a7..15e84a2 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
@@ -32,7 +32,6 @@ import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.cos.COSUpdateInfo;
 import org.apache.pdfbox.cos.ICOSVisitor;
-import org.apache.pdfbox.filter.DecodeResult;
 import org.apache.pdfbox.filter.Filter;
 import org.apache.pdfbox.filter.FilterFactory;
 import org.apache.pdfbox.io.IOUtils;
@@ -51,7 +50,6 @@ import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface
 import org.apache.pdfbox.util.Hex;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.fuzzing.general.GeneralTransformer;
 import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
@@ -192,7 +190,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
     // the current object number
     private long number = 0;
 
-    private int roughtNumberOfObjects = 0;
+    private int roughNumberOfObjects = 0;
 
     // maps the object to the keys generated in the writer
     // these are used for indirect references in other objects
@@ -406,7 +404,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
         COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
         COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
         COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT);
-        roughtNumberOfObjects = doc.getObjects().size();
+        roughNumberOfObjects = doc.getObjects().size();
         if (root != null) {
             addObjectToWrite(root);
         }
@@ -476,7 +474,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
         // write the object
 
         long objectNumber = currentObjectKey.getNumber();
-        if (config.getRandomizeObjectNumbers() > -1.0f && random.nextFloat() <
+        if (config.getRandomizeObjectNumbers() > 0.0f && random.nextFloat() <
             config.getRandomizeObjectNumbers()) {
                 objectNumber = random.nextInt(((int)objectNumber)*2);
         }
@@ -506,16 +504,16 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
 
         COSObject cosObject = (COSObject)obj;
         COSBase underlyingObject = cosObject.getObject();
-        if (underlyingObject instanceof COSStream && config.getRawStreamTransformer() != null) {
+        if (underlyingObject instanceof COSStream && config.getUnfilteredStreamTransformer() != null) {
             COSStream cosStream = (COSStream)underlyingObject;
-            Transformer rawStreamTransformer = config.getRawStreamTransformer();
+            Transformer unfilteredStreamTransformer = config.getUnfilteredStreamTransformer();
             ByteArrayOutputStream bos = new ByteArrayOutputStream();
             try (InputStream is = cosStream.createRawInputStream()) {
                 IOUtils.copy(is, bos);
             }
             ByteArrayOutputStream transformed = new ByteArrayOutputStream();
             try {
-                rawStreamTransformer.transform(new ByteArrayInputStream(bos.toByteArray()), transformed);
+                unfilteredStreamTransformer.transform(new ByteArrayInputStream(bos.toByteArray()), transformed);
             } catch (TikaException e) {
                 throw new IOExceptionWithCause(e);
             }
@@ -609,10 +607,10 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
     }
 
     private TikaInputStream transformRawStream(TikaInputStream is) throws IOException {
-        if (config.getRawStreamTransformer() != null) {
+        if (config.getUnfilteredStreamTransformer() != null) {
             if (is.getLength() < 10000000) {
                 try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
-                    config.getRawStreamTransformer().transform(is, bos);
+                    config.getUnfilteredStreamTransformer().transform(is, bos);
                     bos.flush();
                     bos.close();
                     return TikaInputStream.get(bos.toByteArray());
@@ -623,7 +621,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
                 TemporaryResources tmp = new TemporaryResources();
                 Path p = tmp.createTempFile();
                 try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(p))) {
-                    config.getRawStreamTransformer().transform(is, os);
+                    config.getUnfilteredStreamTransformer().transform(is, os);
                     os.flush();
                 } catch (TikaException e) {
                     throw new IOExceptionWithCause(e);
@@ -1232,7 +1230,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
         float r = random.nextFloat();
         if (randomThreshold > 0.0f &&
                 r < randomThreshold) {
-            long num = random.nextInt(roughtNumberOfObjects);
+            long num = random.nextInt(roughNumberOfObjects);
             LOG.debug("corrupting ref number: "+key.getNumber() + " -> "+num);
             getStandardOutput().write(String.valueOf(num).getBytes(StandardCharsets.ISO_8859_1));
         } else {
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
index d864ef2..aa7729c 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
@@ -19,7 +19,6 @@ package org.apache.tika.fuzzing.pdf;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSObject;
 import org.apache.tika.fuzzing.Transformer;
 import org.apache.tika.fuzzing.general.ByteDeleter;
 import org.apache.tika.fuzzing.general.ByteFlipper;
@@ -53,7 +52,7 @@ public class PDFTransformerConfig {
             new ByteDeleter(),
             new ByteFlipper(), new ByteInjector(), new SpanSwapper(), new Truncator());
 
-    private Transformer rawStreamTransformer = new GeneralTransformer(1,
+    private Transformer unfilteredStreamTransformer = new GeneralTransformer(1,
             new ByteDeleter(),
             new ByteFlipper(), new ByteInjector(), new SpanSwapper(), new Truncator());
 
@@ -61,10 +60,20 @@ public class PDFTransformerConfig {
         return randomizeObjectNumbers;
     }
 
+    /**
+     *
+     * @param randomizeObjectNumbers probability that a given object number will be randomized.
+     *                               If < 0, this will be ignored.
+     */
     public void setRandomizeObjectNumbers(float randomizeObjectNumbers) {
         this.randomizeObjectNumbers = randomizeObjectNumbers;
     }
 
+    /**
+     *
+     * @param randomizeRefNumbers probability that a given reference number will be randomized.
+     *                            If < 0, this will be ignored.
+     */
     public void setRandomizeRefNumbers(float randomizeRefNumbers) {
         this.randomizeRefNumbers = randomizeRefNumbers;
     }
@@ -73,26 +82,44 @@ public class PDFTransformerConfig {
         return randomizeRefNumbers;
     }
 
-    public Transformer getRawStreamTransformer() {
-        return rawStreamTransformer;
+    public Transformer getUnfilteredStreamTransformer() {
+        return unfilteredStreamTransformer;
     }
 
     public Transformer getStreamTransformer() {
         return streamTransformer;
     }
 
+    /**
+     * This transformer is applied to the stream _after_ each filter has been applied.
+     *
+     * @param transformer
+     */
     public void setStreamTransformer(Transformer transformer) {
         this.streamTransformer = transformer;
     }
 
-    public void setRawStreamTransformer(Transformer transformer) {
-        this.rawStreamTransformer = transformer;
+    /**
+     * This transformer is applied to the stream _before_ any filters
+     * are applied.
+     * @param transformer
+     */
+    public void setUnfilteredStreamTransformer(Transformer transformer) {
+        this.unfilteredStreamTransformer = transformer;
     }
 
+    /**
+     *
+     * @param maxFilters maximum number of filters to apply
+     */
     public void setMaxFilters(int maxFilters) {
         this.maxFilters = maxFilters;
     }
 
+    /**
+     * Which filters are allowed
+     * @return
+     */
     public Set<COSName> getAllowableFilters() {
         return allowableFilters;
     }
@@ -101,6 +128,14 @@ public class PDFTransformerConfig {
         this.allowableFilters = allowableFilters;
     }
 
+    /**
+     * If {@link #maxFilters} &gt; 0, this will randomly select filters given
+     * the {@link #maxFilters} and {@link #minFilters}.  If {@link #maxFilters} < 0,
+     * this will return the existing filters.
+     *
+     * @param existingFilters
+     * @return
+     */
     public List<COSName> getFilters(COSBase existingFilters) {
         if (maxFilters < 0) {
             List<COSName> ret = new ArrayList<>();
@@ -132,6 +167,11 @@ public class PDFTransformerConfig {
         return filters;
     }
 
+    /**
+     * Minimum number of filters to apply to streams.
+     *
+     * @param minFilters
+     */
     public void setMinFilters(int minFilters) {
         this.minFilters = minFilters;
     }
@@ -140,6 +180,13 @@ public class PDFTransformerConfig {
         return maxFilteredStreamLength;
     }
 
+    /**
+     * Maximum filtered stream length.  AsciiHex doubles the size of the stream with
+     * each encoding.  This is used as a circuit breaker to stop adding filters
+     * if the stream goes above a given length.
+     *
+     * @param maxFilteredStreamLength
+     */
     public void setMaxFilteredStreamLength(long maxFilteredStreamLength) {
         this.maxFilteredStreamLength = maxFilteredStreamLength;
     }