You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/04/03 21:41:42 UTC

[tika] branch TIKA-3083 created (now bd56182)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3083
in repository https://gitbox.apache.org/repos/asf/tika.git.


      at bd56182  TIKA-3083 -- add fuzzing module

This branch includes the following new commits:

     new bd56182  TIKA-3083 -- add fuzzing module

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tika] 01/01: TIKA-3083 -- add fuzzing module

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3083
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bd56182c548b027883ceb69d29a91a6aae3c081a
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 3 17:41:18 2020 -0400

    TIKA-3083 -- add fuzzing module
---
 pom.xml                                            |    1 +
 tika-fuzzing/pom.xml                               |   59 +
 .../apache/tika/fuzzing/AutoDetectTransformer.java |   96 ++
 .../java/org/apache/tika/fuzzing/Transformer.java  |   41 +
 .../java/org/apache/tika/fuzzing/cli/FuzzOne.java  |  266 ++++
 .../org/apache/tika/fuzzing/cli/FuzzingCLI.java    |  240 ++++
 .../apache/tika/fuzzing/cli/FuzzingCLIConfig.java  |  160 +++
 .../tika/fuzzing/exceptions/CantFuzzException.java |   25 +
 .../apache/tika/fuzzing/general/ByteDeleter.java   |   53 +
 .../apache/tika/fuzzing/general/ByteFlipper.java   |   67 +
 .../apache/tika/fuzzing/general/ByteInjector.java  |   76 ++
 .../tika/fuzzing/general/GeneralTransformer.java   |   95 ++
 .../apache/tika/fuzzing/general/SpanSwapper.java   |   84 ++
 .../org/apache/tika/fuzzing/general/Truncator.java |   60 +
 .../org/apache/tika/fuzzing/pdf/EvilCOSWriter.java | 1283 ++++++++++++++++++++
 .../apache/tika/fuzzing/pdf/PDFTransformer.java    |   52 +
 .../tika/fuzzing/pdf/PDFTransformerConfig.java     |   26 +
 .../services/org.apache.tika.fuzzing.Transformer   |   17 +
 tika-fuzzing/src/main/resources/log4j.properties   |   24 +
 tika-fuzzing/src/test/java/TestFuzzingCLI.java     |   67 +
 tika-fuzzing/src/test/java/TestTransformer.java    |   49 +
 .../test/resources/test-documents/heavy_hang.xml   |   25 +
 .../test/resources/test-documents/null_pointer.xml |   25 +
 .../test/resources/test-documents/system_exit.xml  |   25 +
 24 files changed, 2916 insertions(+)

diff --git a/pom.xml b/pom.xml
index 89ee2e2..486c789 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,6 +44,7 @@
     <module>tika-batch</module>
     <module>tika-app</module>
     <module>tika-server</module>
+    <module>tika-fuzzing</module>
     <module>tika-translate</module>
     <module>tika-langdetect</module>
     <module>tika-example</module>
diff --git a/tika-fuzzing/pom.xml b/tika-fuzzing/pom.xml
new file mode 100644
index 0000000..19c89ed
--- /dev/null
+++ b/tika-fuzzing/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parent</artifactId>
+        <version>2.0.0-SNAPSHOT</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>tika-fuzzing</artifactId>
+    <name>Apache Tika fuzzing</name>
+    <url>http://tika.apache.org/</url>
+
+    <modelVersion>4.0.0</modelVersion>
+
+
+    <dependencies>
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>${cli.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jcl-over-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jul-to-slf4j</artifactId>
+        </dependency>
+        <!-- test -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <!-- bring in the mock parser -->
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
new file mode 100644
index 0000000..f27f4a0
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing;
+
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.general.GeneralTransformer;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class AutoDetectTransformer implements Transformer {
+
+    private static final ServiceLoader DEFAULT_LOADER =
+            new ServiceLoader(AutoDetectTransformer.class.getClassLoader());
+
+    TikaConfig config = TikaConfig.getDefaultConfig();
+    MediaTypeRegistry registry = config.getMediaTypeRegistry();
+    Detector detector = TikaConfig.getDefaultConfig().getDetector();
+
+    Transformer fallback = new GeneralTransformer();
+    Map<MediaType, Transformer> transformerMap = new HashMap<>();
+
+    public AutoDetectTransformer() {
+        this(DEFAULT_LOADER.loadServiceProviders(org.apache.tika.fuzzing.Transformer.class));
+    }
+
+    public AutoDetectTransformer(List<Transformer> transformers) {
+        for (Transformer t : transformers) {
+            for (MediaType mediaType : t.getSupportedTypes()) {
+                transformerMap.put(mediaType, t);
+            }
+        }
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return transformerMap.keySet();
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
+        try (TikaInputStream tis = TikaInputStream.get(is)) {
+            // Automatically detect the MIME type of the document
+            Metadata metadata = new Metadata();
+            MediaType type = detector.detect(tis, metadata);
+            Transformer transformer = getTransformer(type);
+            transformer.transform(tis, os);
+        }
+    }
+
+    private Transformer getTransformer(MediaType type) {
+        if (type == null) {
+            return fallback;
+        }
+        // We always work on the normalised, canonical form
+        type = registry.normalize(type);
+
+        while (type != null) {
+            // Try finding a parser for the type
+            Transformer transformer = transformerMap.get(type);
+            if (transformer != null) {
+                return transformer;
+            }
+
+            // Failing that, try for the parent of the type
+            type = registry.getSupertype(type);
+        }
+        return fallback;
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
new file mode 100644
index 0000000..7e3d083
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Set;
+
+public interface Transformer {
+
+    /**
+     * Returns the set of media types supported by this parser when used
+     * with the given parse context.
+     *
+     * @since Apache Tika 1.24.1
+     * @return immutable set of media types
+     */
+    Set<MediaType> getSupportedTypes();
+
+
+    void transform(InputStream is, OutputStream os) throws IOException, TikaException;
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
new file mode 100644
index 0000000..faa1383
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.cli;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.AutoDetectTransformer;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.fuzzing.exceptions.CantFuzzException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.utils.ExceptionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Child process that runs against a single input file
+ */
+public class FuzzOne {
+    private static final Logger LOG = LoggerFactory.getLogger(FuzzOne.class);
+
+    static Options OPTIONS;
+    static {
+        //By the time this commandline is parsed, there should be both an extracts and an inputDir
+        Option extracts = new Option("extracts", true, "directory for extract files");
+        extracts.setRequired(true);
+
+
+        OPTIONS = new Options()
+                .addOption(Option.builder("i")
+                        .longOpt("inputFile")
+                        .desc("input directory for seed files")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("o")
+                        .longOpt("outputFile")
+                        .desc("output file base")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("m")
+                        .longOpt("timeoutMs")
+                        .desc("timeout in ms -- max time allowed to parse a file")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("n")
+                        .desc("thread id (thread number)")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("p")
+                        .longOpt("perFile")
+                        .desc("number of iterations to run per seed file")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("t")
+                        .longOpt("maxTransformers")
+                        .desc("maximum number of transformers to run per iteration")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("r")
+                        .longOpt("retryId")
+                        .desc("which retry is this")
+                        .hasArg(true)
+                        .required(true)
+                        .build());
+    }
+    Parser parser = new AutoDetectParser();
+
+    public static void main(String[] args) throws Exception {
+        FuzzOneConfig config = FuzzOneConfig.parse(args);
+        FuzzOne fuzzOne = new FuzzOne();
+        fuzzOne.execute(config);
+    }
+
+    private void execute(FuzzOneConfig config) {
+        Path src = config.inputFile;
+        Path targetDir = config.outputFileBase;
+        AutoDetectTransformer transformer = new AutoDetectTransformer();
+        for (int i = 0; i < config.perFileIterations; i++) {
+            try {
+                String ext = "-"+config.threadNum + "-" + config.retryNum + "-"+i;
+                fuzz(ext, src, targetDir, transformer, config.timeoutMs);
+            } catch (IOException e) {
+                LOG.warn("problem transforming file", e);
+            } catch (CantFuzzException e) {
+                LOG.warn("can't fuzz this file "+src, e);
+                return;
+            } catch (TikaException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    private void fuzz(String ext, Path src, Path targetFileBase,
+                      Transformer transformer, long timeoutMs) throws IOException, TikaException {
+
+        Path target = targetFileBase.getParent().resolve(
+                targetFileBase.getFileName().toString() +ext);
+
+        try {
+            transformFile(transformer, src, target);
+        } catch (Throwable t) {
+            LOG.warn("failed to transform: " + src.toString());
+            Files.delete(target);
+            throw t;
+        }
+        ExecutorService executor = Executors.newSingleThreadExecutor();
+        Future<Integer> future = executor.submit(new ParseTask(target));
+
+        try {
+            int result = future.get(timeoutMs, TimeUnit.MILLISECONDS);
+            if (result == 1 && Files.exists(target)) {
+                LOG.warn("failed to delete target: "+target);
+            }
+        } catch (TimeoutException e) {
+            LOG.warn("timeout exception:"+target);
+            future.cancel(true);
+            writeErrFile(target, ".timeout");
+            System.exit(1);
+        } catch (InterruptedException|ExecutionException e) {
+            LOG.warn("problem parsing "+target, e);
+            System.exit(1);
+        } finally {
+            executor.shutdownNow();
+        }
+    }
+
+    private void writeErrFile(Path target, String ext) {
+        try {
+            Path err = target.getParent().resolve(target.getFileName().toString()+ext);
+            Files.write(err, new byte[0]);
+        } catch (IOException e) {
+            LOG.warn("things aren't going right today.", e);
+        }
+    }
+
+    private void handleThrowable(Path target, Throwable t) {
+
+        try {
+            Path errMsg = target.getParent().resolve(target.getFileName().toString()+".stacktrace");
+            Files.write(errMsg, ExceptionUtils.getStackTrace(t).getBytes(StandardCharsets.UTF_8));
+        } catch (IOException e) {
+            LOG.warn("things aren't going right today.", t);
+        }
+
+    }
+
+    private void transformFile(Transformer transformer, Path src, Path target) throws IOException, TikaException {
+        try (InputStream is = Files.newInputStream(src); OutputStream os =
+                Files.newOutputStream(target)) {
+            transformer.transform(is, os);
+        }
+    }
+
+    private static class FuzzOneConfig {
+        static FuzzOneConfig parse(String[] args) throws ParseException {
+            CommandLineParser parser = new DefaultParser();
+            CommandLine commandLine = parser.parse(OPTIONS, args);
+            FuzzOneConfig config = new FuzzOneConfig();
+            config.inputFile = Paths.get(commandLine.getOptionValue("i"));
+            config.outputFileBase = Paths.get(commandLine.getOptionValue("o"));
+            config.perFileIterations = Integer.parseInt(commandLine.getOptionValue("p"));
+            config.maxTransformers = Integer.parseInt(commandLine.getOptionValue("t"));
+            config.threadNum = Integer.parseInt(commandLine.getOptionValue("n"));
+            config.retryNum = Integer.parseInt(commandLine.getOptionValue("r"));
+            config.timeoutMs = Integer.parseInt(commandLine.getOptionValue("m"));
+            return config;
+        }
+
+        private Path inputFile;
+        private Path outputFileBase;
+        int perFileIterations;
+        int maxTransformers;
+        int threadNum;
+        int retryNum;
+        long timeoutMs;
+
+    }
+
+    private class ParseTask implements Callable<Integer> {
+        private final Path target;
+        public ParseTask(Path target) {
+            this.target = target;
+        }
+
+        /**
+         *
+         * @return 1 if success
+         * @throws Exception
+         */
+        @Override
+        public Integer call() throws Exception {
+            boolean success = false;
+            try (InputStream is = Files.newInputStream(target)) {
+                LOG.debug("parsing "+target);
+                parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
+                success = true;
+            } catch (TikaException e) {
+                if (e.getCause() != null && e.getCause() instanceof RuntimeException) {
+                    //handleThrowable(target, e.getCause());
+                    success = true;
+                } else {
+                    success = true;
+                }
+            } catch (SAXException|IOException e) {
+                success = true;
+            } catch (Throwable t) {
+                handleThrowable(target, t);
+            } finally {
+                if (success) {
+                    try {
+                        Files.delete(target);
+                    } catch (IOException e) {
+                        LOG.warn("couldn't delete: "+target.toAbsolutePath());
+                    }
+                } else {
+                    LOG.info("FOUND PROBLEM: "+target);
+                }
+            }
+            return success ? 1 : 0;
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
new file mode 100644
index 0000000..3857a9a
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.cli;
+
+import org.apache.tika.utils.ProcessUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.FileVisitResult;
+import java.nio.file.FileVisitor;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class FuzzingCLI {
+    private static final Logger LOG = LoggerFactory.getLogger(FuzzingCLI.class);
+
+    private static final Path POISON = Paths.get("");
+
+    private int maxFiles = -1;
+
+    public static void main (String[] args) throws Exception {
+        FuzzingCLIConfig config = FuzzingCLIConfig.parse(args);
+        if (config.getMaxTransformers() == 0) {
+            LOG.warn("max transformers == 0!");
+        }
+        if (! Files.isDirectory(config.inputDir)) {
+            throw new IllegalArgumentException("input directory doesn't exist: " + config.inputDir);
+        }
+        FuzzingCLI fuzzingCLI = new FuzzingCLI();
+        Files.createDirectories(config.getOutputDirectory());
+        fuzzingCLI.execute(config);
+    }
+
+    private void execute(FuzzingCLIConfig config) {
+        ArrayBlockingQueue<Path> q = new ArrayBlockingQueue(10000);
+        ExecutorService executorService = Executors.newFixedThreadPool(config.getNumThreads()+1);
+        ExecutorCompletionService executorCompletionService = new ExecutorCompletionService(executorService);
+        FileAdder fileAdder = new FileAdder(config.getInputDirectory(), config.getNumThreads(), q);
+        executorCompletionService.submit(fileAdder);
+        for (int i = 0; i < config.numThreads; i++) {
+            executorCompletionService.submit(new Fuzzer(q, config));
+        }
+        int finished = 0;
+        while (finished < config.getNumThreads()+1) {
+            Future<Integer> future = null;
+            try {
+                future = executorCompletionService.poll(1, TimeUnit.SECONDS);
+                if (future != null) {
+                    future.get();
+                    finished++;
+                }
+            } catch (InterruptedException | ExecutionException e) {
+                e.printStackTrace();
+                break;
+            }
+        }
+        executorService.shutdownNow();
+    }
+
+    private static class Fuzzer implements Callable<Integer> {
+        static AtomicInteger COUNTER = new AtomicInteger();
+        private final int threadId = COUNTER.getAndIncrement();
+        private final ArrayBlockingQueue<Path> q;
+        private final FuzzingCLIConfig config;
+        public Fuzzer(ArrayBlockingQueue<Path> q, FuzzingCLIConfig config) {
+            this.q = q;
+            this.config = config;
+        }
+
+        @Override
+        public Integer call() throws Exception {
+            while (true) {
+                Path p = q.take();
+                if (p.equals(POISON)) {
+                    LOG.debug("Thread "+threadId + " stopping");
+                    return 1;
+                }
+                boolean success = false;
+                int tries = 0;
+                while (! success && tries < config.getRetries()) {
+                    if (tries > 0) {
+                        LOG.warn("Retrying ("+tries+") "+p);
+                    }
+                    success = fuzzIt(config, p, tries);
+                    tries++;
+                }
+            }
+        }
+
+        private boolean fuzzIt(FuzzingCLIConfig config, Path p, int retryId) {
+            //the target files should be flattened so that
+            //problematic files are all in one directory...may rethink this option later
+            Path target = config.getOutputDirectory().resolve(
+                    p.getFileName());
+            String cp = System.getProperty("java.class.path");
+
+            String[] args = new String[] {
+                    "java",
+                    "-ea",
+                    "-cp",
+                    ProcessUtils.escapeCommandLine(cp),
+                    "org.apache.tika.fuzzing.cli.FuzzOne",
+                    "-i",
+                    ProcessUtils.escapeCommandLine(p.toAbsolutePath().toString()),
+                    "-o",
+                    ProcessUtils.escapeCommandLine(target.toAbsolutePath().toString()),
+                    "-p",
+                    Integer.toString(config.getPerFileIterations()),
+                    "-t",
+                    Integer.toString(config.getMaxTransformers()),
+                    "-n",
+                    Integer.toString(threadId),
+                    "-r",
+                    Integer.toString(retryId),
+                    "-m",
+                    Long.toString(config.getTimeoutMs())
+            };
+            ProcessBuilder pb = new ProcessBuilder(args);
+            pb.inheritIO();
+            Process process = null;
+            boolean success = false;
+            try {
+                process = pb.start();
+            } catch (IOException e) {
+                LOG.warn("problem starting process", e);
+            }
+            try {
+                long totalTime = 2*config.getTimeoutMs()+config.getPerFileIterations();
+                success = process.waitFor(totalTime, TimeUnit.MILLISECONDS);
+            } catch (InterruptedException e) {
+                LOG.warn("problem waiting for process to finish", e);
+            } finally {
+                if (process.isAlive()) {
+                    LOG.warn("process still alive for " + target.toAbsolutePath());
+                    process.destroyForcibly();
+                }
+                try {
+                    int exitValue = process.exitValue();
+                    if (exitValue != 0) {
+                        success = false;
+                        LOG.warn("bad exit value for " + target.toAbsolutePath());
+                    }
+                } catch (IllegalThreadStateException e) {
+                    success = false;
+                    LOG.warn("not exited");
+                    process.destroyForcibly();
+                }
+            }
+            return success;
+        }
+
+    }
+
+    private class FileAdder implements Callable<Integer> {
+        private final Path inputDir;
+        private final int numThreads;
+        private final ArrayBlockingQueue<Path> queue;
+        private int added = 0;
+        public FileAdder(Path inputDirectory, int numThreads, ArrayBlockingQueue<Path> queue) {
+            this.inputDir = inputDirectory;
+            this.numThreads = numThreads;
+            this.queue = queue;
+        }
+
+        @Override
+        public Integer call() throws Exception {
+            Files.walkFileTree(inputDir, new DirWalker());
+            for (int i = 0; i < numThreads; i++) {
+                queue.add(POISON);
+            }
+            return 1;
+        }
+
+        private class DirWalker implements FileVisitor<Path> {
+
+            @Override
+            public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+                if (maxFiles > -1 && added >= maxFiles) {
+                    LOG.info("hit maxfiles; file crawler is stopping early");
+                    return FileVisitResult.TERMINATE;
+                }
+
+                try {
+                    boolean offered = queue.offer(file, 10, TimeUnit.MINUTES);
+                    if (offered) {
+                        added++;
+                        return FileVisitResult.CONTINUE;
+                    } else {
+                        LOG.error("couldn't add a file after 10 minutes!");
+                        return FileVisitResult.TERMINATE;
+                    }
+                } catch (InterruptedException e) {
+                    e.printStackTrace();
+                    return FileVisitResult.TERMINATE;
+                }
+            }
+
+            @Override
+            public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
new file mode 100644
index 0000000..324b934
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.cli;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class FuzzingCLIConfig {
+
+    private static final int DEFAULT_NUM_THREADS = 4;
+    private static final int DEFAULT_NUM_ITERATIONS = 1000;
+    //allow all transformers to operate
+    private static final int DEFAULT_MAX_TRANSFORMERS = -1;
+
+    private static final long DEFAULT_TIMEOUT_MS = 120000;
+
+    private static final int DEFAULT_RETRIES = 2;
+
+    static Options OPTIONS;
+    static {
+        //By the time this commandline is parsed, there should be both an extracts and an inputDir
+        Option extracts = new Option("extracts", true, "directory for extract files");
+        extracts.setRequired(true);
+
+
+        OPTIONS = new Options()
+                .addOption(Option.builder("i")
+                        .longOpt("inputDir")
+                        .desc("input directory for seed files")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("o")
+                        .longOpt("outputDir")
+                        .desc("output directory for files that triggered problems")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("n")
+                        .longOpt("numThreads")
+                        .desc("number of threads")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("p")
+                        .longOpt("perFile")
+                        .desc("number of iterations to run per seed file")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("t")
+                        .longOpt("maxTransformers")
+                        .desc("maximum number of transformers to run per iteration")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("m")
+                        .longOpt("timeoutMs")
+                        .desc("timeout in ms -- max time allowed to parse a file")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("r")
+                        .longOpt("retries")
+                        .desc("number of times to retry a seed file if there's a catastrophic failure")
+                        .hasArg(true)
+                        .required(false)
+                        .build());
+
+    }
+
+    public static FuzzingCLIConfig parse(String[] args) throws ParseException {
+        CommandLineParser parser = new DefaultParser();
+        CommandLine commandLine = parser.parse(OPTIONS, args);
+        FuzzingCLIConfig config = new FuzzingCLIConfig();
+        config.inputDir = Paths.get(commandLine.getOptionValue("i"));
+        config.outputDir = Paths.get(commandLine.getOptionValue("o"));
+        config.numThreads = (commandLine.hasOption("n")) ?
+                Integer.parseInt(commandLine.getOptionValue("n")) :
+                DEFAULT_NUM_THREADS;
+        config.perFileIterations = (commandLine.hasOption("p")) ?
+                Integer.parseInt(commandLine.getOptionValue("p")) :
+                DEFAULT_NUM_ITERATIONS;
+        config.maxTransformers = (commandLine.hasOption("t")) ?
+                Integer.parseInt(commandLine.getOptionValue("t")) :
+                DEFAULT_MAX_TRANSFORMERS;
+        config.timeoutMS = (commandLine.hasOption("m")) ?
+                Integer.parseInt(commandLine.getOptionValue("m")) :
+                DEFAULT_TIMEOUT_MS;
+        config.retries = (commandLine.hasOption("r")) ?
+                Integer.parseInt(commandLine.getOptionValue("r")) :
+                DEFAULT_RETRIES;
+        return config;
+    }
+
+
+    int numThreads;
+    //number of variants tried per file
+    int perFileIterations;
+    //maxTransformers per file
+    int maxTransformers;
+
+    //max time allowed to process each file in milliseconds
+    long timeoutMS;
+
+    //times to retry a seed file after a catastrophic failure
+    int retries;
+    Path inputDir;
+    Path outputDir;
+
+
+    public int getNumThreads() {
+        return numThreads;
+    }
+
+    public Path getInputDirectory() {
+        return inputDir;
+    }
+
+    public Path getOutputDirectory() {
+        return outputDir;
+    }
+
+    public int getMaxTransformers() {
+        return maxTransformers;
+    }
+
+    public long getTimeoutMs() {
+        return timeoutMS;
+    }
+
+    public int getPerFileIterations() {
+        return perFileIterations;
+    }
+
+    public int getRetries() {
+        return retries;
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/exceptions/CantFuzzException.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/exceptions/CantFuzzException.java
new file mode 100644
index 0000000..3540822
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/exceptions/CantFuzzException.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.exceptions;
+
+import org.apache.tika.exception.TikaException;
+
+public class CantFuzzException extends TikaException {
+    public CantFuzzException(String msg) {
+        super(msg);
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
new file mode 100644
index 0000000..ff26f7f
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class ByteDeleter implements Transformer {
+    Random random = new Random();
+    float percentDeleted = 0.01f;
+
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        int c = is.read();
+        while (c != -1) {
+            if (random.nextFloat() >= percentDeleted) {
+                os.write(c);
+            } else {
+                //skip
+            }
+            c = is.read();
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
new file mode 100644
index 0000000..74e9b5f
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class ByteFlipper implements Transformer {
+
+    //TODO add something about protecting first x bytes?
+    private Random random = new Random();
+    private float percentCorrupt = 0.01f;
+
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        //TODO -- don't load the full thing into memory
+        byte[] input = IOUtils.toByteArray(is);
+        if (input.length == 0) {
+            return;
+        }
+        byte[] singleByte = new byte[1];
+        //make sure that there's at least one change, even in short files
+        int atLeastOneIndex = random.nextInt(input.length);
+
+        for (int i = 0; i < input.length; i++) {
+            if (random.nextFloat() <= percentCorrupt || i == atLeastOneIndex) {
+                random.nextBytes(singleByte);
+                os.write(singleByte[0]);
+            } else {
+                os.write(input[i]);
+            }
+        }
+    }
+
+    public void setPercentCorrupt(float percentCorrupt) {
+        percentCorrupt = percentCorrupt;
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
new file mode 100644
index 0000000..2dbfec8
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.mime.MediaType;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class ByteInjector implements Transformer {
+    Random random = new Random();
+    float injectionFrequency = 0.01f;
+    int maxSpan = 100;
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        //TODO -- don't load the full thing into memory
+        byte[] input = IOUtils.toByteArray(is);
+        int numInjections = (int) Math.floor((double)injectionFrequency*(double)input.length);
+        //at least one injection
+        numInjections = numInjections == 0 ? 1 : numInjections;
+        int[] starts = new int[numInjections];
+        if (numInjections > 1) {
+            for (int i = 0; i < numInjections; i++) {
+                starts[i] = random.nextInt(input.length - 1);
+            }
+        } else {
+            starts[0] = 0;
+        }
+        Arrays.sort(starts);
+        int startIndex = 0;
+
+        for (int i = 0; i < input.length; i++) {
+            os.write(input[i]);
+            if (startIndex < starts.length && starts[startIndex] == i) {
+                inject(os);
+                startIndex++;
+            }
+        }
+    }
+
+    private void inject(OutputStream os) throws IOException {
+        int len = random.nextInt(maxSpan);
+        byte[] randBytes = new byte[len];
+        random.nextBytes(randBytes);
+        os.write(randBytes);
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
new file mode 100644
index 0000000..803784e
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.commons.compress.utils.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+public class GeneralTransformer implements Transformer {
+
+    private static final Logger LOG = LoggerFactory.getLogger(GeneralTransformer.class);
+
+    Random random = new Random();
+
+    private final int maxTransforms;
+    private final Transformer[] transformers;
+    private final Set<MediaType> supportedTypes;
+    public GeneralTransformer() {
+        this(new ByteDeleter(), new ByteFlipper(),
+                new ByteInjector(), new Truncator(), new SpanSwapper());
+    }
+
+    public GeneralTransformer(Transformer ... transformers) {
+        this(transformers.length, transformers);
+    }
+
+    public GeneralTransformer(int maxTransforms, Transformer ... transformers) {
+        this.maxTransforms = (maxTransforms < 0) ? transformers.length : maxTransforms;
+        this.transformers = transformers;
+        Set<MediaType> tmpTypes = new HashSet<>();
+        for (Transformer transformer : transformers) {
+            tmpTypes.addAll(transformer.getSupportedTypes());
+        }
+        supportedTypes = Collections.unmodifiableSet(tmpTypes);
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return supportedTypes;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
+        //used for debugging
+        if (maxTransforms == 0) {
+            return;
+        }
+        int transformerCount = (maxTransforms == 1) ? 1 : 1 + random.nextInt(maxTransforms);
+        int[] transformerIndices = new int[transformerCount];
+        for (int i = 0; i < transformerCount; i++) {
+            transformerIndices[i] = random.nextInt(transformerCount);
+        }
+        //TODO -- make this actually streaming
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        IOUtils.copy(is, bos);
+        for (int i = 0; i < transformerIndices.length-1; i++) {
+            byte[] bytes = bos.toByteArray();
+            bos = new ByteArrayOutputStream();
+            transformers[transformerIndices[i]].transform(
+                    new ByteArrayInputStream(bytes), bos);
+            bos.flush();
+            if (bos.toByteArray().length == 0) {
+                LOG.warn("zero length: "+transformers[transformerIndices[i]]);
+            }
+        }
+        os.write(bos.toByteArray());
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
new file mode 100644
index 0000000..e2bc16c
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * randomly swaps spans from the input
+ *
+ */
+public class SpanSwapper implements Transformer {
+
+    Random random = new Random();
+    private float swapProbability = 0.01f;
+    int maxSpanLength = 10000;
+
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        byte[] input = IOUtils.toByteArray(is);
+        int numSwaps = (int) Math.floor(swapProbability*input.length);
+        //at least one swap
+        numSwaps = numSwaps == 0 ? 1 : numSwaps;
+        byte[] ret = new byte[input.length];
+        System.arraycopy(input, 0, ret, 0, input.length);
+        for (int i = 0; i < numSwaps; i++) {
+            ret = swap(ret);
+        }
+        os.write(ret);
+    }
+
+    private byte[] swap(byte[] ret) {
+        if (ret.length == 0) {
+            return new byte[0];
+        }
+        int srcStart = random.nextInt(ret.length);
+        int targStart = random.nextInt(ret.length);
+        //these spans can overlap;
+
+        int len = random.nextInt(maxSpanLength);
+        int maxStart = Math.max(srcStart, targStart);
+        len = (len+maxStart < ret.length) ? len :
+                ret.length-maxStart;
+
+        byte[] landingBytes = new byte[len];
+        //copy the landing zone
+        System.arraycopy(ret, targStart, landingBytes, 0, len);
+        //now copy the src onto the targ
+        System.arraycopy(ret, srcStart, ret, targStart, len);
+        //now copy the targ over to the src
+        System.arraycopy(landingBytes, 0, ret, srcStart, len);
+        return ret;
+    }
+
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
new file mode 100644
index 0000000..209810c
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class Truncator implements Transformer {
+
+    Random random = new Random();
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        //TODO -- redo streaming
+        byte[] input = IOUtils.toByteArray(is);
+        if (input.length == 0) {
+            return;
+        }
+        int len = 1 + random.nextInt(input.length);
+        //at least one
+        if (len >= input.length) {
+            len = input.length-2;
+            if (len < 0) {
+                len = 0;
+            }
+        }
+
+        byte[] ret = new byte[len];
+        System.arraycopy(input, 0, ret, 0, len);
+        os.write(ret);
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
new file mode 100644
index 0000000..0484c93
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
@@ -0,0 +1,1283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.pdf;
+
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSBoolean;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNull;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.cos.COSUpdateInfo;
+import org.apache.pdfbox.cos.ICOSVisitor;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.io.RandomAccessInputStream;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.pdfparser.PDFXRefStream;
+import org.apache.pdfbox.pdfwriter.COSStandardOutputStream;
+import org.apache.pdfbox.pdfwriter.COSWriter;
+import org.apache.pdfbox.pdfwriter.COSWriterXRefEntry;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
+import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
+import org.apache.pdfbox.pdmodel.interactive.digitalsignature.COSFilterInputStream;
+import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
+import org.apache.pdfbox.util.Hex;
+
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.SequenceInputStream;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+public class EvilCOSWriter implements ICOSVisitor, Closeable {
+    /**
+     * The dictionary open token.
+     */
+    public static final byte[] DICT_OPEN = "<<".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The dictionary close token.
+     */
+    public static final byte[] DICT_CLOSE = ">>".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * space character.
+     */
+    public static final byte[] SPACE = {' '};
+    /**
+     * The start to a PDF comment.
+     */
+    public static final byte[] COMMENT = {'%'};
+
+    /**
+     * The output version of the PDF.
+     */
+    public static final byte[] VERSION = "PDF-1.4".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * Garbage bytes used to create the PDF header.
+     */
+    public static final byte[] GARBAGE = new byte[]{(byte) 0xf6, (byte) 0xe4, (byte) 0xfc, (byte) 0xdf};
+    /**
+     * The EOF constant.
+     */
+    public static final byte[] EOF = "%%EOF".getBytes(StandardCharsets.US_ASCII);
+    // pdf tokens
+
+    /**
+     * The reference token.
+     */
+    public static final byte[] REFERENCE = "R".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The XREF token.
+     */
+    public static final byte[] XREF = "xref".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The xref free token.
+     */
+    public static final byte[] XREF_FREE = "f".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The xref used token.
+     */
+    public static final byte[] XREF_USED = "n".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The trailer token.
+     */
+    public static final byte[] TRAILER = "trailer".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The start xref token.
+     */
+    public static final byte[] STARTXREF = "startxref".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The starting object token.
+     */
+    public static final byte[] OBJ = "obj".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The end object token.
+     */
+    public static final byte[] ENDOBJ = "endobj".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The array open token.
+     */
+    public static final byte[] ARRAY_OPEN = "[".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The array close token.
+     */
+    public static final byte[] ARRAY_CLOSE = "]".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The open stream token.
+     */
+    public static final byte[] STREAM = "stream".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The close stream token.
+     */
+    public static final byte[] ENDSTREAM = "endstream".getBytes(StandardCharsets.US_ASCII);
+
+    private final NumberFormat formatXrefOffset = new DecimalFormat("0000000000",
+            DecimalFormatSymbols.getInstance(Locale.US));
+
+    // the decimal format for the xref object generation number data
+    private final NumberFormat formatXrefGeneration = new DecimalFormat("00000",
+            DecimalFormatSymbols.getInstance(Locale.US));
+
+    // the stream where we create the pdf output
+    private OutputStream output;
+
+    // the stream used to write standard cos data
+    private COSStandardOutputStream standardOutput;
+
+    // the start position of the x ref section
+    private long startxref = 0;
+
+    // the current object number
+    private long number = 0;
+
+    // maps the object to the keys generated in the writer
+    // these are used for indirect references in other objects
+    //A hashtable is used on purpose over a hashmap
+    //so that null entries will not get added.
+    @SuppressWarnings({"squid:S1149"})
+    private final Map<COSBase, COSObjectKey> objectKeys = new Hashtable<>();
+
+    private final Map<COSObjectKey, COSBase> keyObject = new HashMap<>();
+
+    // the list of x ref entries to be made so far
+    private final List<COSWriterXRefEntry> xRefEntries = new ArrayList<>();
+    private final Set<COSBase> objectsToWriteSet = new HashSet<>();
+
+    //A list of objects to write.
+    private final Deque<COSBase> objectsToWrite = new LinkedList<>();
+
+    //a list of objects already written
+    private final Set<COSBase> writtenObjects = new HashSet<>();
+
+    //An 'actual' is any COSBase that is not a COSObject.
+    //need to keep a list of the actuals that are added
+    //as well as the objects because there is a problem
+    //when adding a COSObject and then later adding
+    //the actual for that object, so we will track
+    //actuals separately.
+    private final Set<COSBase> actualsAdded = new HashSet<>();
+
+    private COSObjectKey currentObjectKey = null;
+    private PDDocument pdDocument = null;
+    private FDFDocument fdfDocument = null;
+    private boolean willEncrypt = false;
+
+    // signing
+    private boolean incrementalUpdate = false;
+    private boolean reachedSignature = false;
+    private long signatureOffset;
+    private long signatureLength;
+    private long byteRangeOffset;
+    private long byteRangeLength;
+    private RandomAccessRead incrementalInput;
+    private OutputStream incrementalOutput;
+    private SignatureInterface signatureInterface;
+    private byte[] incrementPart;
+    private COSArray byteRangeArray;
+
+    private final PDFTransformerConfig config;
+    private final Random random = new Random();
+    /**
+     * COSWriter constructor.
+     *
+     * @param outputStream The output stream to write the PDF. It will be closed when this object is
+     *                     closed.
+     */
+    public EvilCOSWriter(OutputStream outputStream, PDFTransformerConfig config) {
+        setOutput(outputStream);
+        setStandardOutput(new COSStandardOutputStream(output));
+        this.config = config;
+    }
+
+    private void prepareIncrement(PDDocument doc)  throws IOException {
+        if (doc != null) {
+            COSDocument cosDoc = doc.getDocument();
+
+            Map<COSObjectKey, Long> xrefTable = cosDoc.getXrefTable();
+            Set<COSObjectKey> keySet = xrefTable.keySet();
+            long highestNumber = doc.getDocument().getHighestXRefObjectNumber();
+            for (COSObjectKey cosObjectKey : keySet) {
+                COSBase object = cosDoc.getObjectFromPool(cosObjectKey).getObject();
+                if (object != null && cosObjectKey != null && !(object instanceof COSNumber)) {
+                    objectKeys.put(object, cosObjectKey);
+                    keyObject.put(cosObjectKey, object);
+                }
+
+                if (cosObjectKey != null) {
+                    long num = cosObjectKey.getNumber();
+                    if (num > highestNumber) {
+                        highestNumber = num;
+                    }
+                }
+            }
+            setNumber(highestNumber);
+        }
+    }
+
+    /**
+     * add an entry in the x ref table for later dump.
+     *
+     * @param entry The new entry to add.
+     */
+    protected void addXRefEntry(COSWriterXRefEntry entry) {
+        getXRefEntries().add(entry);
+    }
+
+    /**
+     * This will close the stream.
+     *
+     * @throws IOException If the underlying stream throws an exception.
+     */
+    @Override
+    public void close() throws IOException {
+        if (getStandardOutput() != null) {
+            getStandardOutput().close();
+        }
+        if (incrementalOutput != null) {
+            incrementalOutput.close();
+        }
+    }
+
+    /**
+     * This will get the current object number.
+     *
+     * @return The current object number.
+     */
+    protected long getNumber() {
+        return number;
+    }
+
+    /**
+     * This will get all available object keys.
+     *
+     * @return A map of all object keys.
+     */
+    public Map<COSBase, COSObjectKey> getObjectKeys() {
+        return objectKeys;
+    }
+
+    /**
+     * This will get the output stream.
+     *
+     * @return The output stream.
+     */
+    protected java.io.OutputStream getOutput() {
+        return output;
+    }
+
+    /**
+     * This will get the standard output stream.
+     *
+     * @return The standard output stream.
+     */
+    protected COSStandardOutputStream getStandardOutput() {
+        return standardOutput;
+    }
+
+    /**
+     * This will get the current start xref.
+     *
+     * @return The current start xref.
+     */
+    protected long getStartxref() {
+        return startxref;
+    }
+
+    /**
+     * This will get the xref entries.
+     *
+     * @return All available xref entries.
+     */
+    protected List<COSWriterXRefEntry> getXRefEntries() {
+        return xRefEntries;
+    }
+
+    /**
+     * This will set the current object number.
+     *
+     * @param newNumber The new object number.
+     */
+    protected void setNumber(long newNumber) {
+        number = newNumber;
+
+    }
+
+    /**
+     * This will set the output stream.
+     *
+     * @param newOutput The new output stream.
+     */
+    private void setOutput(OutputStream newOutput) {
+        output = newOutput;
+    }
+
+    /**
+     * This will set the standard output stream.
+     *
+     * @param newStandardOutput The new standard output stream.
+     */
+    private void setStandardOutput(COSStandardOutputStream newStandardOutput) {
+        standardOutput = newStandardOutput;
+    }
+
+    /**
+     * This will set the start xref.
+     *
+     * @param newStartxref The new start xref attribute.
+     */
+    protected void setStartxref(long newStartxref) {
+        startxref = newStartxref;
+    }
+
+    /**
+     * This will write the body of the document.
+     *
+     * @param doc The document to write the body for.
+     * @throws IOException If there is an error writing the data.
+     */
+    protected void doWriteBody(COSDocument doc) throws IOException {
+        COSDictionary trailer = doc.getTrailer();
+        COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
+        COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
+        COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT);
+        if (root != null) {
+            addObjectToWrite(root);
+        }
+        if (info != null) {
+            addObjectToWrite(info);
+        }
+
+        doWriteObjects();
+        willEncrypt = false;
+        if (encrypt != null) {
+            addObjectToWrite(encrypt);
+        }
+
+        doWriteObjects();
+    }
+
+    private void doWriteObjects() throws IOException {
+        while (objectsToWrite.size() > 0) {
+            COSBase nextObject = objectsToWrite.removeFirst();
+            objectsToWriteSet.remove(nextObject);
+            doWriteObject(nextObject);
+        }
+    }
+
+    private void addObjectToWrite(COSBase object) {
+        COSBase actual = object;
+        if (actual instanceof COSObject) {
+            actual = ((COSObject) actual).getObject();
+        }
+
+        if (!writtenObjects.contains(object) &&
+                !objectsToWriteSet.contains(object) &&
+                !actualsAdded.contains(actual)) {
+            COSBase cosBase = null;
+            COSObjectKey cosObjectKey = null;
+            if (actual != null) {
+                cosObjectKey = objectKeys.get(actual);
+            }
+            if (cosObjectKey != null) {
+                cosBase = keyObject.get(cosObjectKey);
+            }
+            if (actual != null && objectKeys.containsKey(actual)
+                    && object instanceof COSUpdateInfo && !((COSUpdateInfo) object).isNeedToBeUpdated()
+                    && cosBase instanceof COSUpdateInfo && !((COSUpdateInfo) cosBase).isNeedToBeUpdated()) {
+                return;
+            }
+            objectsToWrite.add(object);
+            objectsToWriteSet.add(object);
+            if (actual != null) {
+                actualsAdded.add(actual);
+            }
+        }
+    }
+
+    /**
+     * This will write a COS object.
+     *
+     * @param obj The object to write.
+     * @throws IOException if the output cannot be written
+     */
+    public void doWriteObject(COSBase obj) throws IOException {
+        writtenObjects.add(obj);
+        // find the physical reference
+        currentObjectKey = getObjectKey(obj);
+        // add a x ref entry
+        addXRefEntry(new COSWriterXRefEntry(getStandardOutput().getPos(), obj, currentObjectKey));
+        // write the object
+
+        long objectNumber = currentObjectKey.getNumber();
+        if (config.getRandomizeObjectNumbers()) {
+            if (random.nextFloat() < 0.99) {
+                long orig = objectNumber;
+                objectNumber = 1;//random.nextInt(((int)objectNumber)*2);
+            }
+        }
+        getStandardOutput().write(String.valueOf(objectNumber).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(String.valueOf(currentObjectKey.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(OBJ);
+        getStandardOutput().writeEOL();
+        // null test added to please Sonar
+        // TODO: shouldn't all public methods be guarded against passing null. Passing null to most methods will
+        // fail with an NPE
+        mutate(obj);
+        if (obj != null) {
+            obj.accept(this);
+        }
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(ENDOBJ);
+        getStandardOutput().writeEOL();
+    }
+
+    private void mutate(COSBase obj) {
+        //stub
+        if (obj instanceof COSStream) {
+            COSStream stream = (COSStream)obj;
+            //manipulate filters and stream length
+        }
+    }
+
+    /**
+     * This will write the header to the PDF document.
+     *
+     * @param doc The document to get the data from.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    protected void doWriteHeader(COSDocument doc) throws IOException {
+        String headerString;
+        if (fdfDocument != null) {
+            headerString = "%FDF-" + Float.toString(doc.getVersion());
+        } else {
+            headerString = "%PDF-" + Float.toString(doc.getVersion());
+        }
+        getStandardOutput().write(headerString.getBytes(StandardCharsets.ISO_8859_1));
+
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(COMMENT);
+        getStandardOutput().write(GARBAGE);
+        getStandardOutput().writeEOL();
+    }
+
+
+    /**
+     * This will write the trailer to the PDF document.
+     *
+     * @param doc The document to create the trailer for.
+     * @throws IOException If there is an IOError while writing the document.
+     */
+    protected void doWriteTrailer(COSDocument doc) throws IOException {
+        getStandardOutput().write(TRAILER);
+        getStandardOutput().writeEOL();
+
+        COSDictionary trailer = doc.getTrailer();
+        //sort xref, needed only if object keys not regenerated
+        Collections.sort(getXRefEntries());
+        COSWriterXRefEntry lastEntry = getXRefEntries().get(getXRefEntries().size() - 1);
+        trailer.setLong(COSName.SIZE, lastEntry.getKey().getNumber() + 1);
+        // Only need to stay, if an incremental update will be performed
+        if (!incrementalUpdate) {
+            trailer.removeItem(COSName.PREV);
+        }
+        if (!doc.isXRefStream()) {
+            trailer.removeItem(COSName.XREF_STM);
+        }
+        // Remove a checksum if present
+        trailer.removeItem(COSName.DOC_CHECKSUM);
+
+        COSArray idArray = trailer.getCOSArray(COSName.ID);
+        if (idArray != null) {
+            idArray.setDirect(true);
+        }
+
+        trailer.accept(this);
+    }
+
+    private void doWriteXRefInc(COSDocument doc, long hybridPrev) throws IOException {
+        if (doc.isXRefStream() || hybridPrev != -1) {
+            // the file uses XrefStreams, so we need to update
+            // it with an xref stream. We create a new one and fill it
+            // with data available here
+
+            // create a new XRefStrema object
+            PDFXRefStream pdfxRefStream = new PDFXRefStream(doc);
+
+            // add all entries from the incremental update.
+            List<COSWriterXRefEntry> xRefEntries2 = getXRefEntries();
+            for (COSWriterXRefEntry cosWriterXRefEntry : xRefEntries2) {
+                pdfxRefStream.addEntry(cosWriterXRefEntry);
+            }
+
+            COSDictionary trailer = doc.getTrailer();
+            if (incrementalUpdate) {
+                // use previous startXref value as new PREV value
+                trailer.setLong(COSName.PREV, doc.getStartXref());
+            } else {
+                trailer.removeItem(COSName.PREV);
+            }
+            pdfxRefStream.addTrailerInfo(trailer);
+            // the size is the highest object number+1. we add one more
+            // for the xref stream object we are going to write
+            pdfxRefStream.setSize(getNumber() + 2);
+
+            setStartxref(getStandardOutput().getPos());
+            COSStream stream2 = pdfxRefStream.getStream();
+            doWriteObject(stream2);
+        }
+
+        if (!doc.isXRefStream() || hybridPrev != -1) {
+            COSDictionary trailer = doc.getTrailer();
+            trailer.setLong(COSName.PREV, doc.getStartXref());
+            if (hybridPrev != -1) {
+                COSName xrefStm = COSName.XREF_STM;
+                trailer.removeItem(xrefStm);
+                trailer.setLong(xrefStm, getStartxref());
+            }
+            doWriteXRefTable();
+            doWriteTrailer(doc);
+        }
+    }
+
+    // writes the "xref" table
+    private void doWriteXRefTable() throws IOException {
+        addXRefEntry(COSWriterXRefEntry.getNullEntry());
+
+        // sort xref, needed only if object keys not regenerated
+        Collections.sort(getXRefEntries());
+
+        // remember the position where x ref was written
+        setStartxref(getStandardOutput().getPos());
+
+        getStandardOutput().write(XREF);
+        getStandardOutput().writeEOL();
+        // write start object number and object count for this x ref section
+        // we assume starting from scratch
+
+        Long[] xRefRanges = getXRefRanges(getXRefEntries());
+        int xRefLength = xRefRanges.length;
+        int x = 0;
+        int j = 0;
+        while (x < xRefLength && (xRefLength % 2) == 0) {
+            writeXrefRange(xRefRanges[x], xRefRanges[x + 1]);
+
+            for (int i = 0; i < xRefRanges[x + 1]; ++i) {
+                writeXrefEntry(xRefEntries.get(j++));
+            }
+            x += 2;
+        }
+    }
+
+    /**
+     * Write an incremental update for a non signature case. This can be used for e.g. augmenting
+     * signatures.
+     *
+     * @throws IOException
+     */
+    private void doWriteIncrement() throws IOException {
+        // write existing PDF
+        IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput);
+        // write the actual incremental update
+        incrementalOutput.write(((ByteArrayOutputStream) output).toByteArray());
+    }
+
+    private void doWriteSignature() throws IOException {
+        // calculate the ByteRange values
+        long inLength = incrementalInput.length();
+        long beforeLength = signatureOffset;
+        long afterOffset = signatureOffset + signatureLength;
+        long afterLength = getStandardOutput().getPos() - (inLength + signatureLength) - (signatureOffset - inLength);
+
+        String byteRange = "0 " + beforeLength + " " + afterOffset + " " + afterLength + "]";
+
+        // Assign the values to the actual COSArray, so that the user can access it before closing
+        byteRangeArray.set(0, COSInteger.ZERO);
+        byteRangeArray.set(1, COSInteger.get(beforeLength));
+        byteRangeArray.set(2, COSInteger.get(afterOffset));
+        byteRangeArray.set(3, COSInteger.get(afterLength));
+
+        if (byteRange.length() > byteRangeLength) {
+            throw new IOException("Can't write new byteRange '" + byteRange +
+                    "' not enough space: byteRange.length(): " + byteRange.length() +
+                    ", byteRangeLength: " + byteRangeLength);
+        }
+
+        // copy the new incremental data into a buffer (e.g. signature dict, trailer)
+        ByteArrayOutputStream byteOut = (ByteArrayOutputStream) output;
+        byteOut.flush();
+        incrementPart = byteOut.toByteArray();
+
+        // overwrite the ByteRange in the buffer
+        byte[] byteRangeBytes = byteRange.getBytes(StandardCharsets.ISO_8859_1);
+        for (int i = 0; i < byteRangeLength; i++) {
+            if (i >= byteRangeBytes.length) {
+                incrementPart[(int) (byteRangeOffset + i - inLength)] = 0x20; // SPACE
+            } else {
+                incrementPart[(int) (byteRangeOffset + i - inLength)] = byteRangeBytes[i];
+            }
+        }
+
+        if (signatureInterface != null) {
+            // data to be signed
+            final InputStream dataToSign = getDataToSign();
+
+            // sign the bytes
+            byte[] signatureBytes = signatureInterface.sign(dataToSign);
+            writeExternalSignature(signatureBytes);
+        }
+        // else signature should created externally and set via writeSignature()
+    }
+
+    /**
+     * Return the stream of PDF data to be signed. Clients should use this method only to create
+     * signatures externally. {@link #write(PDDocument)} method should have been called prior. The
+     * created signature should be set using {@link #writeExternalSignature(byte[])}.
+     * <p>
+     * When {@link SignatureInterface} instance is used, COSWriter obtains and writes the signature
+     * itself.
+     * </p>
+     *
+     * @return data stream to be signed
+     * @throws IllegalStateException if PDF is not prepared for external signing
+     * @throws IOException           if input data is closed
+     */
+    public InputStream getDataToSign() throws IOException {
+        if (incrementPart == null || incrementalInput == null) {
+            throw new IllegalStateException("PDF not prepared for signing");
+        }
+        // range of incremental bytes to be signed (includes /ByteRange but not /Contents)
+        int incPartSigOffset = (int) (signatureOffset - incrementalInput.length());
+        int afterSigOffset = incPartSigOffset + (int) signatureLength;
+        int[] range =
+                {
+                        0, incPartSigOffset,
+                        afterSigOffset, incrementPart.length - afterSigOffset
+                };
+
+        return new SequenceInputStream(
+                new RandomAccessInputStream(incrementalInput),
+                new COSFilterInputStream(incrementPart, range));
+    }
+
+    /**
+     * Write externally created signature of PDF data obtained via {@link #getDataToSign()} method.
+     *
+     * @param cmsSignature CMS signature byte array
+     * @throws IllegalStateException if PDF is not prepared for external signing
+     * @throws IOException           if source data stream is closed
+     */
+    public void writeExternalSignature(byte[] cmsSignature) throws IOException {
+
+        if (incrementPart == null || incrementalInput == null) {
+            throw new IllegalStateException("PDF not prepared for setting signature");
+        }
+        byte[] signatureBytes = Hex.getBytes(cmsSignature);
+
+        // subtract 2 bytes because of the enclosing "<>"
+        if (signatureBytes.length > signatureLength - 2) {
+            throw new IOException("Can't write signature, not enough space");
+        }
+
+        // overwrite the signature Contents in the buffer
+        int incPartSigOffset = (int) (signatureOffset - incrementalInput.length());
+        System.arraycopy(signatureBytes, 0, incrementPart, incPartSigOffset + 1, signatureBytes.length);
+
+        // write the data to the incremental output stream
+        IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput);
+        incrementalOutput.write(incrementPart);
+
+        // prevent further use
+        incrementPart = null;
+    }
+
+    private void writeXrefRange(long x, long y) throws IOException {
+        getStandardOutput().write(String.valueOf(x).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(String.valueOf(y).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().writeEOL();
+    }
+
+    private void writeXrefEntry(COSWriterXRefEntry entry) throws IOException {
+        String offset = formatXrefOffset.format(entry.getOffset());
+        String generation = formatXrefGeneration.format(entry.getKey().getGeneration());
+        getStandardOutput().write(offset.getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(generation.getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(entry.isFree() ? XREF_FREE : XREF_USED);
+        getStandardOutput().writeCRLF();
+    }
+
+    /**
+     * check the xref entries and write out the ranges.  The format of the
+     * returned array is exactly the same as the pdf specification.  See section
+     * 7.5.4 of ISO32000-1:2008, example 1 (page 40) for reference.
+     * <p>
+     * example: 0 1 2 5 6 7 8 10
+     * <p>
+     * will create a array with follow ranges
+     * <p>
+     * 0 3 5 4 10 1
+     * <p>
+     * this mean that the element 0 is followed by two other related numbers
+     * that represent a cluster of the size 3. 5 is follow by three other
+     * related numbers and create a cluster of size 4. etc.
+     *
+     * @param xRefEntriesList list with the xRef entries that was written
+     * @return a integer array with the ranges
+     */
+    protected Long[] getXRefRanges(List<COSWriterXRefEntry> xRefEntriesList) {
+        long last = -2;
+        long count = 1;
+
+        List<Long> list = new ArrayList<>();
+        for (Object object : xRefEntriesList) {
+            long nr = (int) ((COSWriterXRefEntry) object).getKey().getNumber();
+            if (nr == last + 1) {
+                ++count;
+                last = nr;
+            } else if (last == -2) {
+                last = nr;
+            } else {
+                list.add(last - count + 1);
+                list.add(count);
+                last = nr;
+                count = 1;
+            }
+        }
+        // If no new entry is found, we need to write out the last result
+        if (xRefEntriesList.size() > 0) {
+            list.add(last - count + 1);
+            list.add(count);
+        }
+        return list.toArray(new Long[list.size()]);
+    }
+
+    /**
+     * This will get the object key for the object.
+     *
+     * @param obj The object to get the key for.
+     * @return The object key for the object.
+     */
+    private COSObjectKey getObjectKey(COSBase obj) {
+        COSBase actual = obj;
+        if (actual instanceof COSObject) {
+            actual = ((COSObject) obj).getObject();
+        }
+        // PDFBOX-4540: because objectKeys is accessible from outside, it is possible
+        // that a COSObject obj is already in the objectKeys map.
+        COSObjectKey key = objectKeys.get(obj);
+        if (key == null && actual != null) {
+            key = objectKeys.get(actual);
+        }
+        if (key == null) {
+            setNumber(getNumber() + 1);
+            key = new COSObjectKey(getNumber(), 0);
+            objectKeys.put(obj, key);
+            if (actual != null) {
+                objectKeys.put(actual, key);
+            }
+        }
+        return key;
+    }
+
+    @Override
+    public Object visitFromArray(COSArray obj) throws IOException {
+        int count = 0;
+        getStandardOutput().write(ARRAY_OPEN);
+        for (Iterator<COSBase> i = obj.iterator(); i.hasNext(); ) {
+            COSBase current = i.next();
+            if (current instanceof COSDictionary) {
+                if (current.isDirect()) {
+                    visitFromDictionary((COSDictionary) current);
+                } else {
+                    addObjectToWrite(current);
+                    writeReference(current);
+                }
+            } else if (current instanceof COSObject) {
+                COSBase subValue = ((COSObject) current).getObject();
+                if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) {
+                    // PDFBOX-4308: added willEncrypt to prevent an object
+                    // that is referenced several times from being written
+                    // direct and indirect, thus getting encrypted
+                    // with wrong object number or getting encrypted twice
+                    addObjectToWrite(current);
+                    writeReference(current);
+                } else {
+                    subValue.accept(this);
+                }
+            } else if (current == null) {
+                COSNull.NULL.accept(this);
+            } else {
+                current.accept(this);
+            }
+            count++;
+            if (i.hasNext()) {
+                if (count % 10 == 0) {
+                    getStandardOutput().writeEOL();
+                } else {
+                    getStandardOutput().write(SPACE);
+                }
+            }
+        }
+        getStandardOutput().write(ARRAY_CLOSE);
+        getStandardOutput().writeEOL();
+        return null;
+    }
+
+    @Override
+    public Object visitFromBoolean(COSBoolean obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromDictionary(COSDictionary obj) throws IOException {
+        if (!reachedSignature) {
+            COSBase itemType = obj.getItem(COSName.TYPE);
+            if (COSName.SIG.equals(itemType) || COSName.DOC_TIME_STAMP.equals(itemType)) {
+                reachedSignature = true;
+            }
+        }
+        getStandardOutput().write(DICT_OPEN);
+        getStandardOutput().writeEOL();
+        for (Map.Entry<COSName, COSBase> entry : obj.entrySet()) {
+            COSBase value = entry.getValue();
+            if (value != null) {
+                entry.getKey().accept(this);
+                getStandardOutput().write(SPACE);
+                if (value instanceof COSDictionary) {
+                    COSDictionary dict = (COSDictionary) value;
+
+                    if (!incrementalUpdate) {
+                        // write all XObjects as direct objects, this will save some size
+                        // PDFBOX-3684: but avoid dictionary that references itself
+                        COSBase item = dict.getItem(COSName.XOBJECT);
+                        if (item != null && !COSName.XOBJECT.equals(entry.getKey())) {
+                            item.setDirect(true);
+                        }
+                        item = dict.getItem(COSName.RESOURCES);
+                        if (item != null && !COSName.RESOURCES.equals(entry.getKey())) {
+                            item.setDirect(true);
+                        }
+                    }
+
+                    if (dict.isDirect()) {
+                        // If the object should be written direct, we need
+                        // to pass the dictionary to the visitor again.
+                        visitFromDictionary(dict);
+                    } else {
+                        addObjectToWrite(dict);
+                        writeReference(dict);
+                    }
+                } else if (value instanceof COSObject) {
+                    COSBase subValue = ((COSObject) value).getObject();
+                    if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) {
+                        // PDFBOX-4308: added willEncrypt to prevent an object
+                        // that is referenced several times from being written
+                        // direct and indirect, thus getting encrypted
+                        // with wrong object number or getting encrypted twice
+                        addObjectToWrite(value);
+                        writeReference(value);
+                    } else {
+                        subValue.accept(this);
+                    }
+                } else {
+                    // If we reach the pdf signature, we need to determinate the position of the
+                    // content and byterange
+                    if (reachedSignature && COSName.CONTENTS.equals(entry.getKey())) {
+                        signatureOffset = getStandardOutput().getPos();
+                        value.accept(this);
+                        signatureLength = getStandardOutput().getPos() - signatureOffset;
+                    } else if (reachedSignature && COSName.BYTERANGE.equals(entry.getKey())) {
+                        byteRangeArray = (COSArray) entry.getValue();
+                        byteRangeOffset = getStandardOutput().getPos() + 1;
+                        value.accept(this);
+                        byteRangeLength = getStandardOutput().getPos() - 1 - byteRangeOffset;
+                        reachedSignature = false;
+                    } else {
+                        value.accept(this);
+                    }
+                }
+                getStandardOutput().writeEOL();
+
+            } else {
+                //then we won't write anything, there are a couple cases
+                //were the value of an entry in the COSDictionary will
+                //be a dangling reference that points to nothing
+                //so we will just not write out the entry if that is the case
+            }
+        }
+        getStandardOutput().write(DICT_CLOSE);
+        getStandardOutput().writeEOL();
+        return null;
+    }
+
+    @Override
+    public Object visitFromDocument(COSDocument doc) throws IOException {
+        if (!incrementalUpdate) {
+            doWriteHeader(doc);
+        } else {
+            // Sometimes the original file will be missing a newline at the end
+            // In order to avoid having %%EOF the first object on the same line
+            // as the %%EOF, we put a newline here. If there's already one at
+            // the end of the file, an extra one won't hurt. PDFBOX-1051
+            getStandardOutput().writeCRLF();
+        }
+
+        doWriteBody(doc);
+
+        // get the previous trailer
+        COSDictionary trailer = doc.getTrailer();
+        long hybridPrev = -1;
+
+        if (trailer != null) {
+            hybridPrev = trailer.getLong(COSName.XREF_STM);
+        }
+
+        if (incrementalUpdate || doc.isXRefStream()) {
+            doWriteXRefInc(doc, hybridPrev);
+        } else {
+            doWriteXRefTable();
+            doWriteTrailer(doc);
+        }
+
+        // write endof
+        getStandardOutput().write(STARTXREF);
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(String.valueOf(getStartxref()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(EOF);
+        getStandardOutput().writeEOL();
+
+        if (incrementalUpdate) {
+            if (signatureOffset == 0 || byteRangeOffset == 0) {
+                doWriteIncrement();
+            } else {
+                doWriteSignature();
+            }
+        }
+
+        return null;
+    }
+
+    @Override
+    public Object visitFromFloat(COSFloat obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromInt(COSInteger obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromName(COSName obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromNull(COSNull obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    /**
+     * visitFromObjRef method comment.
+     *
+     * @param obj The object that is being visited.
+     * @throws IOException If there is an exception while visiting this object.
+     */
+    public void writeReference(COSBase obj) throws IOException {
+        COSObjectKey key = getObjectKey(obj);
+        getStandardOutput().write(String.valueOf(key.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(String.valueOf(key.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(REFERENCE);
+    }
+
+    @Override
+    public Object visitFromStream(COSStream obj) throws IOException {
+        if (willEncrypt) {
+            pdDocument.getEncryption().getSecurityHandler()
+                    .encryptStream(obj, currentObjectKey.getNumber(), currentObjectKey.getGeneration());
+        }
+
+        InputStream input = null;
+        try {
+            // write the stream content
+            visitFromDictionary(obj);
+            getStandardOutput().write(STREAM);
+            getStandardOutput().writeCRLF();
+
+            input = obj.createRawInputStream();
+            IOUtils.copy(input, getStandardOutput());
+
+            getStandardOutput().writeCRLF();
+            getStandardOutput().write(ENDSTREAM);
+            getStandardOutput().writeEOL();
+            return null;
+        } finally {
+            if (input != null) {
+                input.close();
+            }
+        }
+    }
+
+    @Override
+    public Object visitFromString(COSString obj) throws IOException {
+        if (willEncrypt) {
+            pdDocument.getEncryption().getSecurityHandler().encryptString(
+                    obj,
+                    currentObjectKey.getNumber(),
+                    currentObjectKey.getGeneration());
+        }
+        COSWriter.writeString(obj, getStandardOutput());
+        return null;
+    }
+
+    /**
+     * This will write the pdf document.
+     *
+     * @param doc The document to write.
+     * @throws IOException If an error occurs while generating the data.
+     */
+    public void write(COSDocument doc) throws IOException {
+        PDDocument pdDoc = new PDDocument(doc);
+        write(pdDoc);
+    }
+
+    /**
+     * This will write the pdf document. If signature should be created externally,
+     * {@link #writeExternalSignature(byte[])} should be invoked to set signature after calling this method.
+     *
+     * @param doc The document to write.
+     * @throws IOException If an error occurs while generating the data.
+     */
+    public void write(PDDocument doc) throws IOException {
+        write(doc, null);
+    }
+
+    /**
+     * This will write the pdf document. If signature should be created externally,
+     * {@link #writeExternalSignature(byte[])} should be invoked to set signature after calling this method.
+     *
+     * @param doc           The document to write.
+     * @param signInterface class to be used for signing; {@code null} if external signing would be performed
+     *                      or there will be no signing at all
+     * @throws IOException           If an error occurs while generating the data.
+     * @throws IllegalStateException If the document has an encryption dictionary but no protection
+     *                               policy.
+     */
+    public void write(PDDocument doc, SignatureInterface signInterface) throws IOException {
+        Long idTime = doc.getDocumentId() == null ? System.currentTimeMillis() :
+                doc.getDocumentId();
+
+        pdDocument = doc;
+        signatureInterface = signInterface;
+
+        if (incrementalUpdate) {
+            prepareIncrement(doc);
+        }
+
+        // if the document says we should remove encryption, then we shouldn't encrypt
+        if (doc.isAllSecurityToBeRemoved()) {
+            willEncrypt = false;
+            // also need to get rid of the "Encrypt" in the trailer so readers
+            // don't try to decrypt a document which is not encrypted
+            COSDocument cosDoc = doc.getDocument();
+            COSDictionary trailer = cosDoc.getTrailer();
+            trailer.removeItem(COSName.ENCRYPT);
+        } else {
+            if (pdDocument.getEncryption() != null) {
+                if (!incrementalUpdate) {
+                    SecurityHandler securityHandler = pdDocument.getEncryption().getSecurityHandler();
+                    if (!securityHandler.hasProtectionPolicy()) {
+                        throw new IllegalStateException("PDF contains an encryption dictionary, please remove it with "
+                                + "setAllSecurityToBeRemoved() or set a protection policy with protect()");
+                    }
+                    securityHandler.prepareDocumentForEncryption(pdDocument);
+                }
+                willEncrypt = true;
+            } else {
+                willEncrypt = false;
+            }
+        }
+
+        COSDocument cosDoc = pdDocument.getDocument();
+        COSDictionary trailer = cosDoc.getTrailer();
+        COSArray idArray;
+        boolean missingID = true;
+        COSBase base = trailer.getDictionaryObject(COSName.ID);
+        if (base instanceof COSArray) {
+            idArray = (COSArray) base;
+            if (idArray.size() == 2) {
+                missingID = false;
+            }
+        } else {
+            idArray = new COSArray();
+        }
+        if (missingID || incrementalUpdate) {
+            MessageDigest md5;
+            try {
+                md5 = MessageDigest.getInstance("MD5");
+            } catch (NoSuchAlgorithmException e) {
+                // should never happen
+                throw new RuntimeException(e);
+            }
+
+            // algorithm says to use time/path/size/values in doc to generate the id.
+            // we don't have path or size, so do the best we can
+            md5.update(Long.toString(idTime).getBytes(StandardCharsets.ISO_8859_1));
+
+            COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
+            if (info != null) {
+                for (COSBase cosBase : info.getValues()) {
+                    md5.update(cosBase.toString().getBytes(StandardCharsets.ISO_8859_1));
+                }
+            }
+            // reuse origin documentID if available as first value
+            COSString firstID = missingID ? new COSString(md5.digest()) : (COSString) idArray.get(0);
+            // it's ok to use the same ID for the second part if the ID is created for the first time
+            COSString secondID = missingID ? firstID : new COSString(md5.digest());
+            idArray = new COSArray();
+            idArray.add(firstID);
+            idArray.add(secondID);
+            trailer.setItem(COSName.ID, idArray);
+        }
+        cosDoc.accept(this);
+    }
+
+    /**
+     * This will write the fdf document.
+     *
+     * @param doc The document to write.
+     * @throws IOException If an error occurs while generating the data.
+     */
+    public void write(FDFDocument doc) throws IOException {
+        fdfDocument = doc;
+        willEncrypt = false;
+        COSDocument cosDoc = fdfDocument.getDocument();
+        cosDoc.accept(this);
+    }
+
+    /**
+     * This will output the given byte getString as a PDF object.
+     *
+     * @param string COSString to be written
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    public static void writeString(COSString string, OutputStream output) throws IOException {
+        writeString(string.getBytes(), string.getForceHexForm(), output);
+    }
+
+    /**
+     * This will output the given text/byte getString as a PDF object.
+     *
+     * @param bytes  byte array representation of a string to be written
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    public static void writeString(byte[] bytes, OutputStream output) throws IOException {
+        writeString(bytes, false, output);
+    }
+
+    /**
+     * This will output the given text/byte string as a PDF object.
+     *
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    private static void writeString(byte[] bytes, boolean forceHex, OutputStream output)
+            throws IOException {
+        // check for non-ASCII characters
+        boolean isASCII = true;
+        if (!forceHex) {
+            for (byte b : bytes) {
+                // if the byte is negative then it is an eight bit byte and is outside the ASCII range
+                if (b < 0) {
+                    isASCII = false;
+                    break;
+                }
+                // PDFBOX-3107 EOL markers within a string are troublesome
+                if (b == 0x0d || b == 0x0a) {
+                    isASCII = false;
+                    break;
+                }
+            }
+        }
+
+        if (isASCII && !forceHex) {
+            // write ASCII string
+            output.write('(');
+            for (byte b : bytes) {
+                switch (b) {
+                    case '(':
+                    case ')':
+                    case '\\':
+                        output.write('\\');
+                        output.write(b);
+                        break;
+                    default:
+                        output.write(b);
+                        break;
+                }
+            }
+            output.write(')');
+        } else {
+            // write hex string
+            output.write('<');
+            Hex.writeHexBytes(bytes, output);
+            output.write('>');
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
new file mode 100644
index 0000000..ab7fa11
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.pdf;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.fuzzing.exceptions.CantFuzzException;
+import org.apache.tika.mime.MediaType;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Set;
+
+public class PDFTransformer implements Transformer {
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    private PDFTransformerConfig config = new PDFTransformerConfig();
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
+        try (PDDocument pdDocument = PDDocument.load(is)) {
+            try (EvilCOSWriter cosWriter = new EvilCOSWriter(os, config)) {
+                cosWriter.write(pdDocument);
+            }
+        } catch (InvalidPasswordException e) {
+            throw new CantFuzzException("encrypted doc");
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
new file mode 100644
index 0000000..d152878
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.pdf;
+
+public class PDFTransformerConfig {
+
+    private boolean randomizeObjectNumbers = true;
+
+    public boolean getRandomizeObjectNumbers() {
+        return randomizeObjectNumbers;
+    }
+}
diff --git a/tika-fuzzing/src/main/resources/META-INF/services/org.apache.tika.fuzzing.Transformer b/tika-fuzzing/src/main/resources/META-INF/services/org.apache.tika.fuzzing.Transformer
new file mode 100644
index 0000000..07390de
--- /dev/null
+++ b/tika-fuzzing/src/main/resources/META-INF/services/org.apache.tika.fuzzing.Transformer
@@ -0,0 +1,17 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.fuzzing.general.GeneralTransformer
+#org.apache.tika.fuzzing.pdf.PDFTransformer
\ No newline at end of file
diff --git a/tika-fuzzing/src/main/resources/log4j.properties b/tika-fuzzing/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7d3b372
--- /dev/null
+++ b/tika-fuzzing/src/main/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stderr
+
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+
+log4j.appender.stderr.layout.ConversionPattern= %-5p %m%n
diff --git a/tika-fuzzing/src/test/java/TestFuzzingCLI.java b/tika-fuzzing/src/test/java/TestFuzzingCLI.java
new file mode 100644
index 0000000..a98291b
--- /dev/null
+++ b/tika-fuzzing/src/test/java/TestFuzzingCLI.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.fuzzing.cli.FuzzingCLI;
+import org.apache.tika.utils.ProcessUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class TestFuzzingCLI {
+
+    @Test
+    @Ignore
+    public void testBasic() throws Exception {
+        //convert to actual unit test
+        String inputDir = "";// fill in
+        String outputDir = "";//fill in
+        String[] args = new String[] {
+                "-i", inputDir,
+                "-o", outputDir,
+                "-n", "8", // num threads
+                "-t", "1", //max transformers
+                "-p", "100", //per file iterations
+                "-r", "3"
+        };
+        FuzzingCLI.main(args);
+    }
+
+    @Test
+    @Ignore
+    public void testMock() throws Exception {
+        //convert to actual unit test
+        Path inputDir = Paths.get(getClass().getResource("/test-documents").toURI());
+        Path outputDir = Files.createTempDirectory("tika-fuzzing-");
+        String[] args = new String[] {
+                "-i", ProcessUtils.escapeCommandLine(inputDir.toAbsolutePath().toString()),
+                "-o", ProcessUtils.escapeCommandLine(outputDir.toAbsolutePath().toString()),
+                "-n", "8", // num threads
+                "-t", "0", //max transformers
+                "-p", "10", //per file iterations
+                "-m", "10000", //max ms per file
+                "-r", "3"
+        };
+        try {
+            FuzzingCLI.main(args);
+        } finally {
+            FileUtils.deleteDirectory(outputDir.toFile());
+        }
+    }
+}
diff --git a/tika-fuzzing/src/test/java/TestTransformer.java b/tika-fuzzing/src/test/java/TestTransformer.java
new file mode 100644
index 0000000..1db2e1e
--- /dev/null
+++ b/tika-fuzzing/src/test/java/TestTransformer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.tika.fuzzing.general.GeneralTransformer;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+
+public class TestTransformer {
+
+    @Test
+    @Ignore
+    public void testBasic() throws Exception {
+        //turn into actual unit test
+        Path path = Paths.get("");//put something meaningful here
+
+        GeneralTransformer transformer = new GeneralTransformer();
+        byte[] bytes = Files.readAllBytes(path);
+
+        for (int i = 0; i < 100; i++) {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            transformer.transform(
+                    new ByteArrayInputStream(bytes), bos);
+
+            if (Arrays.equals(bos.toByteArray(), bytes)) {
+                System.out.println("SAME");
+            }
+        }
+    }
+}
diff --git a/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml b/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
new file mode 100644
index 0000000..f1f5b67
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <hang millis="30000" heavy="true" pulse_millis="100" />
+</mock>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml b/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
new file mode 100644
index 0000000..4561c3a
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <throw class="java.lang.NullPointerException">another null pointer exception</throw>
+</mock>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/test-documents/system_exit.xml b/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
new file mode 100644
index 0000000..75d1d3b
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <system_exit />
+</mock>
\ No newline at end of file