You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/04/03 21:41:43 UTC
[tika] 01/01: TIKA-3083 -- add fuzzing module

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3083
in repository https://gitbox.apache.org/repos/asf/tika.git

commit bd56182c548b027883ceb69d29a91a6aae3c081a
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 3 17:41:18 2020 -0400

    TIKA-3083 -- add fuzzing module
---
 pom.xml                                            |    1 +
 tika-fuzzing/pom.xml                               |   59 +
 .../apache/tika/fuzzing/AutoDetectTransformer.java |   96 ++
 .../java/org/apache/tika/fuzzing/Transformer.java  |   41 +
 .../java/org/apache/tika/fuzzing/cli/FuzzOne.java  |  266 ++++
 .../org/apache/tika/fuzzing/cli/FuzzingCLI.java    |  240 ++++
 .../apache/tika/fuzzing/cli/FuzzingCLIConfig.java  |  160 +++
 .../tika/fuzzing/exceptions/CantFuzzException.java |   25 +
 .../apache/tika/fuzzing/general/ByteDeleter.java   |   53 +
 .../apache/tika/fuzzing/general/ByteFlipper.java   |   67 +
 .../apache/tika/fuzzing/general/ByteInjector.java  |   76 ++
 .../tika/fuzzing/general/GeneralTransformer.java   |   95 ++
 .../apache/tika/fuzzing/general/SpanSwapper.java   |   84 ++
 .../org/apache/tika/fuzzing/general/Truncator.java |   60 +
 .../org/apache/tika/fuzzing/pdf/EvilCOSWriter.java | 1283 ++++++++++++++++++++
 .../apache/tika/fuzzing/pdf/PDFTransformer.java    |   52 +
 .../tika/fuzzing/pdf/PDFTransformerConfig.java     |   26 +
 .../services/org.apache.tika.fuzzing.Transformer   |   17 +
 tika-fuzzing/src/main/resources/log4j.properties   |   24 +
 tika-fuzzing/src/test/java/TestFuzzingCLI.java     |   67 +
 tika-fuzzing/src/test/java/TestTransformer.java    |   49 +
 .../test/resources/test-documents/heavy_hang.xml   |   25 +
 .../test/resources/test-documents/null_pointer.xml |   25 +
 .../test/resources/test-documents/system_exit.xml  |   25 +
 24 files changed, 2916 insertions(+)

diff --git a/pom.xml b/pom.xml
index 89ee2e2..486c789 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,6 +44,7 @@
     <module>tika-batch</module>
     <module>tika-app</module>
     <module>tika-server</module>
+    <module>tika-fuzzing</module>
     <module>tika-translate</module>
     <module>tika-langdetect</module>
     <module>tika-example</module>
diff --git a/tika-fuzzing/pom.xml b/tika-fuzzing/pom.xml
new file mode 100644
index 0000000..19c89ed
--- /dev/null
+++ b/tika-fuzzing/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <groupId>org.apache.tika</groupId>
+        <artifactId>tika-parent</artifactId>
+        <version>2.0.0-SNAPSHOT</version>
+        <relativePath>../tika-parent/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>tika-fuzzing</artifactId>
+    <name>Apache Tika fuzzing</name>
+    <url>http://tika.apache.org/</url>
+
+    <modelVersion>4.0.0</modelVersion>
+
+
+    <dependencies>
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+            <version>${cli.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <!-- logging -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jcl-over-slf4j</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jul-to-slf4j</artifactId>
+        </dependency>
+        <!-- test -->
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <!-- bring in the mock parser -->
+        <dependency>
+            <groupId>${project.groupId}</groupId>
+            <artifactId>tika-core</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
new file mode 100644
index 0000000..f27f4a0
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing;
+
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.general.GeneralTransformer;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class AutoDetectTransformer implements Transformer {
+
+    private static final ServiceLoader DEFAULT_LOADER =
+            new ServiceLoader(AutoDetectTransformer.class.getClassLoader());
+
+    TikaConfig config = TikaConfig.getDefaultConfig();
+    MediaTypeRegistry registry = config.getMediaTypeRegistry();
+    Detector detector = TikaConfig.getDefaultConfig().getDetector();
+
+    Transformer fallback = new GeneralTransformer();
+    Map<MediaType, Transformer> transformerMap = new HashMap<>();
+
+    public AutoDetectTransformer() {
+        this(DEFAULT_LOADER.loadServiceProviders(org.apache.tika.fuzzing.Transformer.class));
+    }
+
+    public AutoDetectTransformer(List<Transformer> transformers) {
+        for (Transformer t : transformers) {
+            for (MediaType mediaType : t.getSupportedTypes()) {
+                transformerMap.put(mediaType, t);
+            }
+        }
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return transformerMap.keySet();
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
+        try (TikaInputStream tis = TikaInputStream.get(is)) {
+            // Automatically detect the MIME type of the document
+            Metadata metadata = new Metadata();
+            MediaType type = detector.detect(tis, metadata);
+            Transformer transformer = getTransformer(type);
+            transformer.transform(tis, os);
+        }
+    }
+
+    private Transformer getTransformer(MediaType type) {
+        if (type == null) {
+            return fallback;
+        }
+        // We always work on the normalised, canonical form
+        type = registry.normalize(type);
+
+        while (type != null) {
+            // Try finding a parser for the type
+            Transformer transformer = transformerMap.get(type);
+            if (transformer != null) {
+                return transformer;
+            }
+
+            // Failing that, try for the parent of the type
+            type = registry.getSupertype(type);
+        }
+        return fallback;
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
new file mode 100644
index 0000000..7e3d083
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Set;
+
+public interface Transformer {
+
+    /**
+     * Returns the set of media types supported by this parser when used
+     * with the given parse context.
+     *
+     * @since Apache Tika 1.24.1
+     * @return immutable set of media types
+     */
+    Set<MediaType> getSupportedTypes();
+
+
+    void transform(InputStream is, OutputStream os) throws IOException, TikaException;
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
new file mode 100644
index 0000000..faa1383
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
@@ -0,0 +1,266 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.cli;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.AutoDetectTransformer;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.fuzzing.exceptions.CantFuzzException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.utils.ExceptionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Child process that runs against a single input file
+ */
+public class FuzzOne {
+    private static final Logger LOG = LoggerFactory.getLogger(FuzzOne.class);
+
+    static Options OPTIONS;
+    static {
+        //By the time this commandline is parsed, there should be both an extracts and an inputDir
+        Option extracts = new Option("extracts", true, "directory for extract files");
+        extracts.setRequired(true);
+
+
+        OPTIONS = new Options()
+                .addOption(Option.builder("i")
+                        .longOpt("inputFile")
+                        .desc("input directory for seed files")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("o")
+                        .longOpt("outputFile")
+                        .desc("output file base")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("m")
+                        .longOpt("timeoutMs")
+                        .desc("timeout in ms -- max time allowed to parse a file")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("n")
+                        .desc("thread id (thread number)")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("p")
+                        .longOpt("perFile")
+                        .desc("number of iterations to run per seed file")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("t")
+                        .longOpt("maxTransformers")
+                        .desc("maximum number of transformers to run per iteration")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("r")
+                        .longOpt("retryId")
+                        .desc("which retry is this")
+                        .hasArg(true)
+                        .required(true)
+                        .build());
+    }
+    Parser parser = new AutoDetectParser();
+
+    public static void main(String[] args) throws Exception {
+        FuzzOneConfig config = FuzzOneConfig.parse(args);
+        FuzzOne fuzzOne = new FuzzOne();
+        fuzzOne.execute(config);
+    }
+
+    private void execute(FuzzOneConfig config) {
+        Path src = config.inputFile;
+        Path targetDir = config.outputFileBase;
+        AutoDetectTransformer transformer = new AutoDetectTransformer();
+        for (int i = 0; i < config.perFileIterations; i++) {
+            try {
+                String ext = "-"+config.threadNum + "-" + config.retryNum + "-"+i;
+                fuzz(ext, src, targetDir, transformer, config.timeoutMs);
+            } catch (IOException e) {
+                LOG.warn("problem transforming file", e);
+            } catch (CantFuzzException e) {
+                LOG.warn("can't fuzz this file "+src, e);
+                return;
+            } catch (TikaException e) {
+                e.printStackTrace();
+            }
+        }
+    }
+
+    private void fuzz(String ext, Path src, Path targetFileBase,
+                      Transformer transformer, long timeoutMs) throws IOException, TikaException {
+
+        Path target = targetFileBase.getParent().resolve(
+                targetFileBase.getFileName().toString() +ext);
+
+        try {
+            transformFile(transformer, src, target);
+        } catch (Throwable t) {
+            LOG.warn("failed to transform: " + src.toString());
+            Files.delete(target);
+            throw t;
+        }
+        ExecutorService executor = Executors.newSingleThreadExecutor();
+        Future<Integer> future = executor.submit(new ParseTask(target));
+
+        try {
+            int result = future.get(timeoutMs, TimeUnit.MILLISECONDS);
+            if (result == 1 && Files.exists(target)) {
+                LOG.warn("failed to delete target: "+target);
+            }
+        } catch (TimeoutException e) {
+            LOG.warn("timeout exception:"+target);
+            future.cancel(true);
+            writeErrFile(target, ".timeout");
+            System.exit(1);
+        } catch (InterruptedException|ExecutionException e) {
+            LOG.warn("problem parsing "+target, e);
+            System.exit(1);
+        } finally {
+            executor.shutdownNow();
+        }
+    }
+
+    private void writeErrFile(Path target, String ext) {
+        try {
+            Path err = target.getParent().resolve(target.getFileName().toString()+ext);
+            Files.write(err, new byte[0]);
+        } catch (IOException e) {
+            LOG.warn("things aren't going right today.", e);
+        }
+    }
+
+    private void handleThrowable(Path target, Throwable t) {
+
+        try {
+            Path errMsg = target.getParent().resolve(target.getFileName().toString()+".stacktrace");
+            Files.write(errMsg, ExceptionUtils.getStackTrace(t).getBytes(StandardCharsets.UTF_8));
+        } catch (IOException e) {
+            LOG.warn("things aren't going right today.", t);
+        }
+
+    }
+
+    private void transformFile(Transformer transformer, Path src, Path target) throws IOException, TikaException {
+        try (InputStream is = Files.newInputStream(src); OutputStream os =
+                Files.newOutputStream(target)) {
+            transformer.transform(is, os);
+        }
+    }
+
+    private static class FuzzOneConfig {
+        static FuzzOneConfig parse(String[] args) throws ParseException {
+            CommandLineParser parser = new DefaultParser();
+            CommandLine commandLine = parser.parse(OPTIONS, args);
+            FuzzOneConfig config = new FuzzOneConfig();
+            config.inputFile = Paths.get(commandLine.getOptionValue("i"));
+            config.outputFileBase = Paths.get(commandLine.getOptionValue("o"));
+            config.perFileIterations = Integer.parseInt(commandLine.getOptionValue("p"));
+            config.maxTransformers = Integer.parseInt(commandLine.getOptionValue("t"));
+            config.threadNum = Integer.parseInt(commandLine.getOptionValue("n"));
+            config.retryNum = Integer.parseInt(commandLine.getOptionValue("r"));
+            config.timeoutMs = Integer.parseInt(commandLine.getOptionValue("m"));
+            return config;
+        }
+
+        private Path inputFile;
+        private Path outputFileBase;
+        int perFileIterations;
+        int maxTransformers;
+        int threadNum;
+        int retryNum;
+        long timeoutMs;
+
+    }
+
+    private class ParseTask implements Callable<Integer> {
+        private final Path target;
+        public ParseTask(Path target) {
+            this.target = target;
+        }
+
+        /**
+         *
+         * @return 1 if success
+         * @throws Exception
+         */
+        @Override
+        public Integer call() throws Exception {
+            boolean success = false;
+            try (InputStream is = Files.newInputStream(target)) {
+                LOG.debug("parsing "+target);
+                parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
+                success = true;
+            } catch (TikaException e) {
+                if (e.getCause() != null && e.getCause() instanceof RuntimeException) {
+                    //handleThrowable(target, e.getCause());
+                    success = true;
+                } else {
+                    success = true;
+                }
+            } catch (SAXException|IOException e) {
+                success = true;
+            } catch (Throwable t) {
+                handleThrowable(target, t);
+            } finally {
+                if (success) {
+                    try {
+                        Files.delete(target);
+                    } catch (IOException e) {
+                        LOG.warn("couldn't delete: "+target.toAbsolutePath());
+                    }
+                } else {
+                    LOG.info("FOUND PROBLEM: "+target);
+                }
+            }
+            return success ? 1 : 0;
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
new file mode 100644
index 0000000..3857a9a
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.cli;
+
+import org.apache.tika.utils.ProcessUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.FileVisitResult;
+import java.nio.file.FileVisitor;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorCompletionService;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class FuzzingCLI {
+    private static final Logger LOG = LoggerFactory.getLogger(FuzzingCLI.class);
+
+    private static final Path POISON = Paths.get("");
+
+    private int maxFiles = -1;
+
+    public static void main (String[] args) throws Exception {
+        FuzzingCLIConfig config = FuzzingCLIConfig.parse(args);
+        if (config.getMaxTransformers() == 0) {
+            LOG.warn("max transformers == 0!");
+        }
+        if (! Files.isDirectory(config.inputDir)) {
+            throw new IllegalArgumentException("input directory doesn't exist: " + config.inputDir);
+        }
+        FuzzingCLI fuzzingCLI = new FuzzingCLI();
+        Files.createDirectories(config.getOutputDirectory());
+        fuzzingCLI.execute(config);
+    }
+
+    private void execute(FuzzingCLIConfig config) {
+        ArrayBlockingQueue<Path> q = new ArrayBlockingQueue(10000);
+        ExecutorService executorService = Executors.newFixedThreadPool(config.getNumThreads()+1);
+        ExecutorCompletionService executorCompletionService = new ExecutorCompletionService(executorService);
+        FileAdder fileAdder = new FileAdder(config.getInputDirectory(), config.getNumThreads(), q);
+        executorCompletionService.submit(fileAdder);
+        for (int i = 0; i < config.numThreads; i++) {
+            executorCompletionService.submit(new Fuzzer(q, config));
+        }
+        int finished = 0;
+        while (finished < config.getNumThreads()+1) {
+            Future<Integer> future = null;
+            try {
+                future = executorCompletionService.poll(1, TimeUnit.SECONDS);
+                if (future != null) {
+                    future.get();
+                    finished++;
+                }
+            } catch (InterruptedException | ExecutionException e) {
+                e.printStackTrace();
+                break;
+            }
+        }
+        executorService.shutdownNow();
+    }
+
+    private static class Fuzzer implements Callable<Integer> {
+        static AtomicInteger COUNTER = new AtomicInteger();
+        private final int threadId = COUNTER.getAndIncrement();
+        private final ArrayBlockingQueue<Path> q;
+        private final FuzzingCLIConfig config;
+        public Fuzzer(ArrayBlockingQueue<Path> q, FuzzingCLIConfig config) {
+            this.q = q;
+            this.config = config;
+        }
+
+        @Override
+        public Integer call() throws Exception {
+            while (true) {
+                Path p = q.take();
+                if (p.equals(POISON)) {
+                    LOG.debug("Thread "+threadId + " stopping");
+                    return 1;
+                }
+                boolean success = false;
+                int tries = 0;
+                while (! success && tries < config.getRetries()) {
+                    if (tries > 0) {
+                        LOG.warn("Retrying ("+tries+") "+p);
+                    }
+                    success = fuzzIt(config, p, tries);
+                    tries++;
+                }
+            }
+        }
+
+        private boolean fuzzIt(FuzzingCLIConfig config, Path p, int retryId) {
+            //the target files should be flattened so that
+            //problematic files are all in one directory...may rethink this option later
+            Path target = config.getOutputDirectory().resolve(
+                    p.getFileName());
+            String cp = System.getProperty("java.class.path");
+
+            String[] args = new String[] {
+                    "java",
+                    "-ea",
+                    "-cp",
+                    ProcessUtils.escapeCommandLine(cp),
+                    "org.apache.tika.fuzzing.cli.FuzzOne",
+                    "-i",
+                    ProcessUtils.escapeCommandLine(p.toAbsolutePath().toString()),
+                    "-o",
+                    ProcessUtils.escapeCommandLine(target.toAbsolutePath().toString()),
+                    "-p",
+                    Integer.toString(config.getPerFileIterations()),
+                    "-t",
+                    Integer.toString(config.getMaxTransformers()),
+                    "-n",
+                    Integer.toString(threadId),
+                    "-r",
+                    Integer.toString(retryId),
+                    "-m",
+                    Long.toString(config.getTimeoutMs())
+            };
+            ProcessBuilder pb = new ProcessBuilder(args);
+            pb.inheritIO();
+            Process process = null;
+            boolean success = false;
+            try {
+                process = pb.start();
+            } catch (IOException e) {
+                LOG.warn("problem starting process", e);
+            }
+            try {
+                long totalTime = 2*config.getTimeoutMs()+config.getPerFileIterations();
+                success = process.waitFor(totalTime, TimeUnit.MILLISECONDS);
+            } catch (InterruptedException e) {
+                LOG.warn("problem waiting for process to finish", e);
+            } finally {
+                if (process.isAlive()) {
+                    LOG.warn("process still alive for " + target.toAbsolutePath());
+                    process.destroyForcibly();
+                }
+                try {
+                    int exitValue = process.exitValue();
+                    if (exitValue != 0) {
+                        success = false;
+                        LOG.warn("bad exit value for " + target.toAbsolutePath());
+                    }
+                } catch (IllegalThreadStateException e) {
+                    success = false;
+                    LOG.warn("not exited");
+                    process.destroyForcibly();
+                }
+            }
+            return success;
+        }
+
+    }
+
+    private class FileAdder implements Callable<Integer> {
+        private final Path inputDir;
+        private final int numThreads;
+        private final ArrayBlockingQueue<Path> queue;
+        private int added = 0;
+        public FileAdder(Path inputDirectory, int numThreads, ArrayBlockingQueue<Path> queue) {
+            this.inputDir = inputDirectory;
+            this.numThreads = numThreads;
+            this.queue = queue;
+        }
+
+        @Override
+        public Integer call() throws Exception {
+            Files.walkFileTree(inputDir, new DirWalker());
+            for (int i = 0; i < numThreads; i++) {
+                queue.add(POISON);
+            }
+            return 1;
+        }
+
+        private class DirWalker implements FileVisitor<Path> {
+
+            @Override
+            public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+                if (maxFiles > -1 && added >= maxFiles) {
+                    LOG.info("hit maxfiles; file crawler is stopping early");
+                    return FileVisitResult.TERMINATE;
+                }
+
+                try {
+                    boolean offered = queue.offer(file, 10, TimeUnit.MINUTES);
+                    if (offered) {
+                        added++;
+                        return FileVisitResult.CONTINUE;
+                    } else {
+                        LOG.error("couldn't add a file after 10 minutes!");
+                        return FileVisitResult.TERMINATE;
+                    }
+                } catch (InterruptedException e) {
+                    e.printStackTrace();
+                    return FileVisitResult.TERMINATE;
+                }
+            }
+
+            @Override
+            public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+
+            @Override
+            public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+                return FileVisitResult.CONTINUE;
+            }
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
new file mode 100644
index 0000000..324b934
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.cli;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class FuzzingCLIConfig {
+
+    private static final int DEFAULT_NUM_THREADS = 4;
+    private static final int DEFAULT_NUM_ITERATIONS = 1000;
+    //allow all transformers to operate
+    private static final int DEFAULT_MAX_TRANSFORMERS = -1;
+
+    private static final long DEFAULT_TIMEOUT_MS = 120000;
+
+    private static final int DEFAULT_RETRIES = 2;
+
+    static Options OPTIONS;
+    static {
+        //By the time this commandline is parsed, there should be both an extracts and an inputDir
+        Option extracts = new Option("extracts", true, "directory for extract files");
+        extracts.setRequired(true);
+
+
+        OPTIONS = new Options()
+                .addOption(Option.builder("i")
+                        .longOpt("inputDir")
+                        .desc("input directory for seed files")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("o")
+                        .longOpt("outputDir")
+                        .desc("output directory for files that triggered problems")
+                        .hasArg(true)
+                        .required(true)
+                        .build())
+                .addOption(Option.builder("n")
+                        .longOpt("numThreads")
+                        .desc("number of threads")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("p")
+                        .longOpt("perFile")
+                        .desc("number of iterations to run per seed file")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("t")
+                        .longOpt("maxTransformers")
+                        .desc("maximum number of transformers to run per iteration")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("m")
+                        .longOpt("timeoutMs")
+                        .desc("timeout in ms -- max time allowed to parse a file")
+                        .hasArg(true)
+                        .required(false)
+                        .build())
+                .addOption(Option.builder("r")
+                        .longOpt("retries")
+                        .desc("number of times to retry a seed file if there's a catastrophic failure")
+                        .hasArg(true)
+                        .required(false)
+                        .build());
+
+    }
+
+    public static FuzzingCLIConfig parse(String[] args) throws ParseException {
+        CommandLineParser parser = new DefaultParser();
+        CommandLine commandLine = parser.parse(OPTIONS, args);
+        FuzzingCLIConfig config = new FuzzingCLIConfig();
+        config.inputDir = Paths.get(commandLine.getOptionValue("i"));
+        config.outputDir = Paths.get(commandLine.getOptionValue("o"));
+        config.numThreads = (commandLine.hasOption("n")) ?
+                Integer.parseInt(commandLine.getOptionValue("n")) :
+                DEFAULT_NUM_THREADS;
+        config.perFileIterations = (commandLine.hasOption("p")) ?
+                Integer.parseInt(commandLine.getOptionValue("p")) :
+                DEFAULT_NUM_ITERATIONS;
+        config.maxTransformers = (commandLine.hasOption("t")) ?
+                Integer.parseInt(commandLine.getOptionValue("t")) :
+                DEFAULT_MAX_TRANSFORMERS;
+        config.timeoutMS = (commandLine.hasOption("m")) ?
+                Integer.parseInt(commandLine.getOptionValue("m")) :
+                DEFAULT_TIMEOUT_MS;
+        config.retries = (commandLine.hasOption("r")) ?
+                Integer.parseInt(commandLine.getOptionValue("r")) :
+                DEFAULT_RETRIES;
+        return config;
+    }
+
+
+    int numThreads;
+    //number of variants tried per file
+    int perFileIterations;
+    //maxTransformers per file
+    int maxTransformers;
+
+    //max time allowed to process each file in milliseconds
+    long timeoutMS;
+
+    //times to retry a seed file after a catastrophic failure
+    int retries;
+    Path inputDir;
+    Path outputDir;
+
+
+    public int getNumThreads() {
+        return numThreads;
+    }
+
+    public Path getInputDirectory() {
+        return inputDir;
+    }
+
+    public Path getOutputDirectory() {
+        return outputDir;
+    }
+
+    public int getMaxTransformers() {
+        return maxTransformers;
+    }
+
+    public long getTimeoutMs() {
+        return timeoutMS;
+    }
+
+    public int getPerFileIterations() {
+        return perFileIterations;
+    }
+
+    public int getRetries() {
+        return retries;
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/exceptions/CantFuzzException.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/exceptions/CantFuzzException.java
new file mode 100644
index 0000000..3540822
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/exceptions/CantFuzzException.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.exceptions;
+
+import org.apache.tika.exception.TikaException;
+
+public class CantFuzzException extends TikaException {
+    public CantFuzzException(String msg) {
+        super(msg);
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
new file mode 100644
index 0000000..ff26f7f
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class ByteDeleter implements Transformer {
+    Random random = new Random();
+    float percentDeleted = 0.01f;
+
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        int c = is.read();
+        while (c != -1) {
+            if (random.nextFloat() >= percentDeleted) {
+                os.write(c);
+            } else {
+                //skip
+            }
+            c = is.read();
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
new file mode 100644
index 0000000..74e9b5f
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class ByteFlipper implements Transformer {
+
+    //TODO add something about protecting first x bytes?
+    private Random random = new Random();
+    private float percentCorrupt = 0.01f;
+
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        //TODO -- don't load the full thing into memory
+        byte[] input = IOUtils.toByteArray(is);
+        if (input.length == 0) {
+            return;
+        }
+        byte[] singleByte = new byte[1];
+        //make sure that there's at least one change, even in short files
+        int atLeastOneIndex = random.nextInt(input.length);
+
+        for (int i = 0; i < input.length; i++) {
+            if (random.nextFloat() <= percentCorrupt || i == atLeastOneIndex) {
+                random.nextBytes(singleByte);
+                os.write(singleByte[0]);
+            } else {
+                os.write(input[i]);
+            }
+        }
+    }
+
+    public void setPercentCorrupt(float percentCorrupt) {
+        percentCorrupt = percentCorrupt;
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
new file mode 100644
index 0000000..2dbfec8
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.mime.MediaType;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class ByteInjector implements Transformer {
+    Random random = new Random();
+    float injectionFrequency = 0.01f;
+    int maxSpan = 100;
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        //TODO -- don't load the full thing into memory
+        byte[] input = IOUtils.toByteArray(is);
+        int numInjections = (int) Math.floor((double)injectionFrequency*(double)input.length);
+        //at least one injection
+        numInjections = numInjections == 0 ? 1 : numInjections;
+        int[] starts = new int[numInjections];
+        if (numInjections > 1) {
+            for (int i = 0; i < numInjections; i++) {
+                starts[i] = random.nextInt(input.length - 1);
+            }
+        } else {
+            starts[0] = 0;
+        }
+        Arrays.sort(starts);
+        int startIndex = 0;
+
+        for (int i = 0; i < input.length; i++) {
+            os.write(input[i]);
+            if (startIndex < starts.length && starts[startIndex] == i) {
+                inject(os);
+                startIndex++;
+            }
+        }
+    }
+
+    private void inject(OutputStream os) throws IOException {
+        int len = random.nextInt(maxSpan);
+        byte[] randBytes = new byte[len];
+        random.nextBytes(randBytes);
+        os.write(randBytes);
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
new file mode 100644
index 0000000..803784e
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.commons.compress.utils.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+public class GeneralTransformer implements Transformer {
+
+    private static final Logger LOG = LoggerFactory.getLogger(GeneralTransformer.class);
+
+    Random random = new Random();
+
+    private final int maxTransforms;
+    private final Transformer[] transformers;
+    private final Set<MediaType> supportedTypes;
+    public GeneralTransformer() {
+        this(new ByteDeleter(), new ByteFlipper(),
+                new ByteInjector(), new Truncator(), new SpanSwapper());
+    }
+
+    public GeneralTransformer(Transformer ... transformers) {
+        this(transformers.length, transformers);
+    }
+
+    public GeneralTransformer(int maxTransforms, Transformer ... transformers) {
+        this.maxTransforms = (maxTransforms < 0) ? transformers.length : maxTransforms;
+        this.transformers = transformers;
+        Set<MediaType> tmpTypes = new HashSet<>();
+        for (Transformer transformer : transformers) {
+            tmpTypes.addAll(transformer.getSupportedTypes());
+        }
+        supportedTypes = Collections.unmodifiableSet(tmpTypes);
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return supportedTypes;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
+        //used for debugging
+        if (maxTransforms == 0) {
+            return;
+        }
+        int transformerCount = (maxTransforms == 1) ? 1 : 1 + random.nextInt(maxTransforms);
+        int[] transformerIndices = new int[transformerCount];
+        for (int i = 0; i < transformerCount; i++) {
+            transformerIndices[i] = random.nextInt(transformerCount);
+        }
+        //TODO -- make this actually streaming
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        IOUtils.copy(is, bos);
+        for (int i = 0; i < transformerIndices.length-1; i++) {
+            byte[] bytes = bos.toByteArray();
+            bos = new ByteArrayOutputStream();
+            transformers[transformerIndices[i]].transform(
+                    new ByteArrayInputStream(bytes), bos);
+            bos.flush();
+            if (bos.toByteArray().length == 0) {
+                LOG.warn("zero length: "+transformers[transformerIndices[i]]);
+            }
+        }
+        os.write(bos.toByteArray());
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
new file mode 100644
index 0000000..e2bc16c
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * randomly swaps spans from the input
+ *
+ */
+public class SpanSwapper implements Transformer {
+
+    Random random = new Random();
+    private float swapProbability = 0.01f;
+    int maxSpanLength = 10000;
+
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        byte[] input = IOUtils.toByteArray(is);
+        int numSwaps = (int) Math.floor(swapProbability*input.length);
+        //at least one swap
+        numSwaps = numSwaps == 0 ? 1 : numSwaps;
+        byte[] ret = new byte[input.length];
+        System.arraycopy(input, 0, ret, 0, input.length);
+        for (int i = 0; i < numSwaps; i++) {
+            ret = swap(ret);
+        }
+        os.write(ret);
+    }
+
+    private byte[] swap(byte[] ret) {
+        if (ret.length == 0) {
+            return new byte[0];
+        }
+        int srcStart = random.nextInt(ret.length);
+        int targStart = random.nextInt(ret.length);
+        //these spans can overlap;
+
+        int len = random.nextInt(maxSpanLength);
+        int maxStart = Math.max(srcStart, targStart);
+        len = (len+maxStart < ret.length) ? len :
+                ret.length-maxStart;
+
+        byte[] landingBytes = new byte[len];
+        //copy the landing zone
+        System.arraycopy(ret, targStart, landingBytes, 0, len);
+        //now copy the src onto the targ
+        System.arraycopy(ret, srcStart, ret, targStart, len);
+        //now copy the targ over to the src
+        System.arraycopy(landingBytes, 0, ret, srcStart, len);
+        return ret;
+    }
+
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
new file mode 100644
index 0000000..209810c
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.general;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.mime.MediaType;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Random;
+import java.util.Set;
+
+public class Truncator implements Transformer {
+
+    Random random = new Random();
+    static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException {
+        //TODO -- redo streaming
+        byte[] input = IOUtils.toByteArray(is);
+        if (input.length == 0) {
+            return;
+        }
+        int len = 1 + random.nextInt(input.length);
+        //at least one
+        if (len >= input.length) {
+            len = input.length-2;
+            if (len < 0) {
+                len = 0;
+            }
+        }
+
+        byte[] ret = new byte[len];
+        System.arraycopy(input, 0, ret, 0, len);
+        os.write(ret);
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
new file mode 100644
index 0000000..0484c93
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
@@ -0,0 +1,1283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.pdf;
+
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSBoolean;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNull;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.cos.COSUpdateInfo;
+import org.apache.pdfbox.cos.ICOSVisitor;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.io.RandomAccessInputStream;
+import org.apache.pdfbox.io.RandomAccessRead;
+import org.apache.pdfbox.pdfparser.PDFXRefStream;
+import org.apache.pdfbox.pdfwriter.COSStandardOutputStream;
+import org.apache.pdfbox.pdfwriter.COSWriter;
+import org.apache.pdfbox.pdfwriter.COSWriterXRefEntry;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
+import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
+import org.apache.pdfbox.pdmodel.interactive.digitalsignature.COSFilterInputStream;
+import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
+import org.apache.pdfbox.util.Hex;
+
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.SequenceInputStream;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+public class EvilCOSWriter implements ICOSVisitor, Closeable {
+    /**
+     * The dictionary open token.
+     */
+    public static final byte[] DICT_OPEN = "<<".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The dictionary close token.
+     */
+    public static final byte[] DICT_CLOSE = ">>".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * space character.
+     */
+    public static final byte[] SPACE = {' '};
+    /**
+     * The start to a PDF comment.
+     */
+    public static final byte[] COMMENT = {'%'};
+
+    /**
+     * The output version of the PDF.
+     */
+    public static final byte[] VERSION = "PDF-1.4".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * Garbage bytes used to create the PDF header.
+     */
+    public static final byte[] GARBAGE = new byte[]{(byte) 0xf6, (byte) 0xe4, (byte) 0xfc, (byte) 0xdf};
+    /**
+     * The EOF constant.
+     */
+    public static final byte[] EOF = "%%EOF".getBytes(StandardCharsets.US_ASCII);
+    // pdf tokens
+
+    /**
+     * The reference token.
+     */
+    public static final byte[] REFERENCE = "R".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The XREF token.
+     */
+    public static final byte[] XREF = "xref".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The xref free token.
+     */
+    public static final byte[] XREF_FREE = "f".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The xref used token.
+     */
+    public static final byte[] XREF_USED = "n".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The trailer token.
+     */
+    public static final byte[] TRAILER = "trailer".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The start xref token.
+     */
+    public static final byte[] STARTXREF = "startxref".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The starting object token.
+     */
+    public static final byte[] OBJ = "obj".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The end object token.
+     */
+    public static final byte[] ENDOBJ = "endobj".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The array open token.
+     */
+    public static final byte[] ARRAY_OPEN = "[".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The array close token.
+     */
+    public static final byte[] ARRAY_CLOSE = "]".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The open stream token.
+     */
+    public static final byte[] STREAM = "stream".getBytes(StandardCharsets.US_ASCII);
+    /**
+     * The close stream token.
+     */
+    public static final byte[] ENDSTREAM = "endstream".getBytes(StandardCharsets.US_ASCII);
+
+    private final NumberFormat formatXrefOffset = new DecimalFormat("0000000000",
+            DecimalFormatSymbols.getInstance(Locale.US));
+
+    // the decimal format for the xref object generation number data
+    private final NumberFormat formatXrefGeneration = new DecimalFormat("00000",
+            DecimalFormatSymbols.getInstance(Locale.US));
+
+    // the stream where we create the pdf output
+    private OutputStream output;
+
+    // the stream used to write standard cos data
+    private COSStandardOutputStream standardOutput;
+
+    // the start position of the x ref section
+    private long startxref = 0;
+
+    // the current object number
+    private long number = 0;
+
+    // maps the object to the keys generated in the writer
+    // these are used for indirect references in other objects
+    //A hashtable is used on purpose over a hashmap
+    //so that null entries will not get added.
+    @SuppressWarnings({"squid:S1149"})
+    private final Map<COSBase, COSObjectKey> objectKeys = new Hashtable<>();
+
+    private final Map<COSObjectKey, COSBase> keyObject = new HashMap<>();
+
+    // the list of x ref entries to be made so far
+    private final List<COSWriterXRefEntry> xRefEntries = new ArrayList<>();
+    private final Set<COSBase> objectsToWriteSet = new HashSet<>();
+
+    //A list of objects to write.
+    private final Deque<COSBase> objectsToWrite = new LinkedList<>();
+
+    //a list of objects already written
+    private final Set<COSBase> writtenObjects = new HashSet<>();
+
+    //An 'actual' is any COSBase that is not a COSObject.
+    //need to keep a list of the actuals that are added
+    //as well as the objects because there is a problem
+    //when adding a COSObject and then later adding
+    //the actual for that object, so we will track
+    //actuals separately.
+    private final Set<COSBase> actualsAdded = new HashSet<>();
+
+    private COSObjectKey currentObjectKey = null;
+    private PDDocument pdDocument = null;
+    private FDFDocument fdfDocument = null;
+    private boolean willEncrypt = false;
+
+    // signing
+    private boolean incrementalUpdate = false;
+    private boolean reachedSignature = false;
+    private long signatureOffset;
+    private long signatureLength;
+    private long byteRangeOffset;
+    private long byteRangeLength;
+    private RandomAccessRead incrementalInput;
+    private OutputStream incrementalOutput;
+    private SignatureInterface signatureInterface;
+    private byte[] incrementPart;
+    private COSArray byteRangeArray;
+
+    private final PDFTransformerConfig config;
+    private final Random random = new Random();
+    /**
+     * COSWriter constructor.
+     *
+     * @param outputStream The output stream to write the PDF. It will be closed when this object is
+     *                     closed.
+     */
+    public EvilCOSWriter(OutputStream outputStream, PDFTransformerConfig config) {
+        setOutput(outputStream);
+        setStandardOutput(new COSStandardOutputStream(output));
+        this.config = config;
+    }
+
+    private void prepareIncrement(PDDocument doc)  throws IOException {
+        if (doc != null) {
+            COSDocument cosDoc = doc.getDocument();
+
+            Map<COSObjectKey, Long> xrefTable = cosDoc.getXrefTable();
+            Set<COSObjectKey> keySet = xrefTable.keySet();
+            long highestNumber = doc.getDocument().getHighestXRefObjectNumber();
+            for (COSObjectKey cosObjectKey : keySet) {
+                COSBase object = cosDoc.getObjectFromPool(cosObjectKey).getObject();
+                if (object != null && cosObjectKey != null && !(object instanceof COSNumber)) {
+                    objectKeys.put(object, cosObjectKey);
+                    keyObject.put(cosObjectKey, object);
+                }
+
+                if (cosObjectKey != null) {
+                    long num = cosObjectKey.getNumber();
+                    if (num > highestNumber) {
+                        highestNumber = num;
+                    }
+                }
+            }
+            setNumber(highestNumber);
+        }
+    }
+
+    /**
+     * add an entry in the x ref table for later dump.
+     *
+     * @param entry The new entry to add.
+     */
+    protected void addXRefEntry(COSWriterXRefEntry entry) {
+        getXRefEntries().add(entry);
+    }
+
+    /**
+     * This will close the stream.
+     *
+     * @throws IOException If the underlying stream throws an exception.
+     */
+    @Override
+    public void close() throws IOException {
+        if (getStandardOutput() != null) {
+            getStandardOutput().close();
+        }
+        if (incrementalOutput != null) {
+            incrementalOutput.close();
+        }
+    }
+
+    /**
+     * This will get the current object number.
+     *
+     * @return The current object number.
+     */
+    protected long getNumber() {
+        return number;
+    }
+
+    /**
+     * This will get all available object keys.
+     *
+     * @return A map of all object keys.
+     */
+    public Map<COSBase, COSObjectKey> getObjectKeys() {
+        return objectKeys;
+    }
+
+    /**
+     * This will get the output stream.
+     *
+     * @return The output stream.
+     */
+    protected java.io.OutputStream getOutput() {
+        return output;
+    }
+
+    /**
+     * This will get the standard output stream.
+     *
+     * @return The standard output stream.
+     */
+    protected COSStandardOutputStream getStandardOutput() {
+        return standardOutput;
+    }
+
+    /**
+     * This will get the current start xref.
+     *
+     * @return The current start xref.
+     */
+    protected long getStartxref() {
+        return startxref;
+    }
+
+    /**
+     * This will get the xref entries.
+     *
+     * @return All available xref entries.
+     */
+    protected List<COSWriterXRefEntry> getXRefEntries() {
+        return xRefEntries;
+    }
+
+    /**
+     * This will set the current object number.
+     *
+     * @param newNumber The new object number.
+     */
+    protected void setNumber(long newNumber) {
+        number = newNumber;
+
+    }
+
+    /**
+     * This will set the output stream.
+     *
+     * @param newOutput The new output stream.
+     */
+    private void setOutput(OutputStream newOutput) {
+        output = newOutput;
+    }
+
+    /**
+     * This will set the standard output stream.
+     *
+     * @param newStandardOutput The new standard output stream.
+     */
+    private void setStandardOutput(COSStandardOutputStream newStandardOutput) {
+        standardOutput = newStandardOutput;
+    }
+
+    /**
+     * This will set the start xref.
+     *
+     * @param newStartxref The new start xref attribute.
+     */
+    protected void setStartxref(long newStartxref) {
+        startxref = newStartxref;
+    }
+
+    /**
+     * This will write the body of the document.
+     *
+     * @param doc The document to write the body for.
+     * @throws IOException If there is an error writing the data.
+     */
+    protected void doWriteBody(COSDocument doc) throws IOException {
+        COSDictionary trailer = doc.getTrailer();
+        COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
+        COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
+        COSDictionary encrypt = trailer.getCOSDictionary(COSName.ENCRYPT);
+        if (root != null) {
+            addObjectToWrite(root);
+        }
+        if (info != null) {
+            addObjectToWrite(info);
+        }
+
+        doWriteObjects();
+        willEncrypt = false;
+        if (encrypt != null) {
+            addObjectToWrite(encrypt);
+        }
+
+        doWriteObjects();
+    }
+
+    private void doWriteObjects() throws IOException {
+        while (objectsToWrite.size() > 0) {
+            COSBase nextObject = objectsToWrite.removeFirst();
+            objectsToWriteSet.remove(nextObject);
+            doWriteObject(nextObject);
+        }
+    }
+
+    private void addObjectToWrite(COSBase object) {
+        COSBase actual = object;
+        if (actual instanceof COSObject) {
+            actual = ((COSObject) actual).getObject();
+        }
+
+        if (!writtenObjects.contains(object) &&
+                !objectsToWriteSet.contains(object) &&
+                !actualsAdded.contains(actual)) {
+            COSBase cosBase = null;
+            COSObjectKey cosObjectKey = null;
+            if (actual != null) {
+                cosObjectKey = objectKeys.get(actual);
+            }
+            if (cosObjectKey != null) {
+                cosBase = keyObject.get(cosObjectKey);
+            }
+            if (actual != null && objectKeys.containsKey(actual)
+                    && object instanceof COSUpdateInfo && !((COSUpdateInfo) object).isNeedToBeUpdated()
+                    && cosBase instanceof COSUpdateInfo && !((COSUpdateInfo) cosBase).isNeedToBeUpdated()) {
+                return;
+            }
+            objectsToWrite.add(object);
+            objectsToWriteSet.add(object);
+            if (actual != null) {
+                actualsAdded.add(actual);
+            }
+        }
+    }
+
+    /**
+     * This will write a COS object.
+     *
+     * @param obj The object to write.
+     * @throws IOException if the output cannot be written
+     */
+    public void doWriteObject(COSBase obj) throws IOException {
+        writtenObjects.add(obj);
+        // find the physical reference
+        currentObjectKey = getObjectKey(obj);
+        // add a x ref entry
+        addXRefEntry(new COSWriterXRefEntry(getStandardOutput().getPos(), obj, currentObjectKey));
+        // write the object
+
+        long objectNumber = currentObjectKey.getNumber();
+        if (config.getRandomizeObjectNumbers()) {
+            if (random.nextFloat() < 0.99) {
+                long orig = objectNumber;
+                objectNumber = 1;//random.nextInt(((int)objectNumber)*2);
+            }
+        }
+        getStandardOutput().write(String.valueOf(objectNumber).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(String.valueOf(currentObjectKey.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(OBJ);
+        getStandardOutput().writeEOL();
+        // null test added to please Sonar
+        // TODO: shouldn't all public methods be guarded against passing null. Passing null to most methods will
+        // fail with an NPE
+        mutate(obj);
+        if (obj != null) {
+            obj.accept(this);
+        }
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(ENDOBJ);
+        getStandardOutput().writeEOL();
+    }
+
+    private void mutate(COSBase obj) {
+        //stub
+        if (obj instanceof COSStream) {
+            COSStream stream = (COSStream)obj;
+            //manipulate filters and stream length
+        }
+    }
+
+    /**
+     * This will write the header to the PDF document.
+     *
+     * @param doc The document to get the data from.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    protected void doWriteHeader(COSDocument doc) throws IOException {
+        String headerString;
+        if (fdfDocument != null) {
+            headerString = "%FDF-" + Float.toString(doc.getVersion());
+        } else {
+            headerString = "%PDF-" + Float.toString(doc.getVersion());
+        }
+        getStandardOutput().write(headerString.getBytes(StandardCharsets.ISO_8859_1));
+
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(COMMENT);
+        getStandardOutput().write(GARBAGE);
+        getStandardOutput().writeEOL();
+    }
+
+
+    /**
+     * This will write the trailer to the PDF document.
+     *
+     * @param doc The document to create the trailer for.
+     * @throws IOException If there is an IOError while writing the document.
+     */
+    protected void doWriteTrailer(COSDocument doc) throws IOException {
+        getStandardOutput().write(TRAILER);
+        getStandardOutput().writeEOL();
+
+        COSDictionary trailer = doc.getTrailer();
+        //sort xref, needed only if object keys not regenerated
+        Collections.sort(getXRefEntries());
+        COSWriterXRefEntry lastEntry = getXRefEntries().get(getXRefEntries().size() - 1);
+        trailer.setLong(COSName.SIZE, lastEntry.getKey().getNumber() + 1);
+        // Only need to stay, if an incremental update will be performed
+        if (!incrementalUpdate) {
+            trailer.removeItem(COSName.PREV);
+        }
+        if (!doc.isXRefStream()) {
+            trailer.removeItem(COSName.XREF_STM);
+        }
+        // Remove a checksum if present
+        trailer.removeItem(COSName.DOC_CHECKSUM);
+
+        COSArray idArray = trailer.getCOSArray(COSName.ID);
+        if (idArray != null) {
+            idArray.setDirect(true);
+        }
+
+        trailer.accept(this);
+    }
+
+    private void doWriteXRefInc(COSDocument doc, long hybridPrev) throws IOException {
+        if (doc.isXRefStream() || hybridPrev != -1) {
+            // the file uses XrefStreams, so we need to update
+            // it with an xref stream. We create a new one and fill it
+            // with data available here
+
+            // create a new XRefStrema object
+            PDFXRefStream pdfxRefStream = new PDFXRefStream(doc);
+
+            // add all entries from the incremental update.
+            List<COSWriterXRefEntry> xRefEntries2 = getXRefEntries();
+            for (COSWriterXRefEntry cosWriterXRefEntry : xRefEntries2) {
+                pdfxRefStream.addEntry(cosWriterXRefEntry);
+            }
+
+            COSDictionary trailer = doc.getTrailer();
+            if (incrementalUpdate) {
+                // use previous startXref value as new PREV value
+                trailer.setLong(COSName.PREV, doc.getStartXref());
+            } else {
+                trailer.removeItem(COSName.PREV);
+            }
+            pdfxRefStream.addTrailerInfo(trailer);
+            // the size is the highest object number+1. we add one more
+            // for the xref stream object we are going to write
+            pdfxRefStream.setSize(getNumber() + 2);
+
+            setStartxref(getStandardOutput().getPos());
+            COSStream stream2 = pdfxRefStream.getStream();
+            doWriteObject(stream2);
+        }
+
+        if (!doc.isXRefStream() || hybridPrev != -1) {
+            COSDictionary trailer = doc.getTrailer();
+            trailer.setLong(COSName.PREV, doc.getStartXref());
+            if (hybridPrev != -1) {
+                COSName xrefStm = COSName.XREF_STM;
+                trailer.removeItem(xrefStm);
+                trailer.setLong(xrefStm, getStartxref());
+            }
+            doWriteXRefTable();
+            doWriteTrailer(doc);
+        }
+    }
+
+    // writes the "xref" table
+    private void doWriteXRefTable() throws IOException {
+        addXRefEntry(COSWriterXRefEntry.getNullEntry());
+
+        // sort xref, needed only if object keys not regenerated
+        Collections.sort(getXRefEntries());
+
+        // remember the position where x ref was written
+        setStartxref(getStandardOutput().getPos());
+
+        getStandardOutput().write(XREF);
+        getStandardOutput().writeEOL();
+        // write start object number and object count for this x ref section
+        // we assume starting from scratch
+
+        Long[] xRefRanges = getXRefRanges(getXRefEntries());
+        int xRefLength = xRefRanges.length;
+        int x = 0;
+        int j = 0;
+        while (x < xRefLength && (xRefLength % 2) == 0) {
+            writeXrefRange(xRefRanges[x], xRefRanges[x + 1]);
+
+            for (int i = 0; i < xRefRanges[x + 1]; ++i) {
+                writeXrefEntry(xRefEntries.get(j++));
+            }
+            x += 2;
+        }
+    }
+
+    /**
+     * Write an incremental update for a non signature case. This can be used for e.g. augmenting
+     * signatures.
+     *
+     * @throws IOException
+     */
+    private void doWriteIncrement() throws IOException {
+        // write existing PDF
+        IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput);
+        // write the actual incremental update
+        incrementalOutput.write(((ByteArrayOutputStream) output).toByteArray());
+    }
+
+    private void doWriteSignature() throws IOException {
+        // calculate the ByteRange values
+        long inLength = incrementalInput.length();
+        long beforeLength = signatureOffset;
+        long afterOffset = signatureOffset + signatureLength;
+        long afterLength = getStandardOutput().getPos() - (inLength + signatureLength) - (signatureOffset - inLength);
+
+        String byteRange = "0 " + beforeLength + " " + afterOffset + " " + afterLength + "]";
+
+        // Assign the values to the actual COSArray, so that the user can access it before closing
+        byteRangeArray.set(0, COSInteger.ZERO);
+        byteRangeArray.set(1, COSInteger.get(beforeLength));
+        byteRangeArray.set(2, COSInteger.get(afterOffset));
+        byteRangeArray.set(3, COSInteger.get(afterLength));
+
+        if (byteRange.length() > byteRangeLength) {
+            throw new IOException("Can't write new byteRange '" + byteRange +
+                    "' not enough space: byteRange.length(): " + byteRange.length() +
+                    ", byteRangeLength: " + byteRangeLength);
+        }
+
+        // copy the new incremental data into a buffer (e.g. signature dict, trailer)
+        ByteArrayOutputStream byteOut = (ByteArrayOutputStream) output;
+        byteOut.flush();
+        incrementPart = byteOut.toByteArray();
+
+        // overwrite the ByteRange in the buffer
+        byte[] byteRangeBytes = byteRange.getBytes(StandardCharsets.ISO_8859_1);
+        for (int i = 0; i < byteRangeLength; i++) {
+            if (i >= byteRangeBytes.length) {
+                incrementPart[(int) (byteRangeOffset + i - inLength)] = 0x20; // SPACE
+            } else {
+                incrementPart[(int) (byteRangeOffset + i - inLength)] = byteRangeBytes[i];
+            }
+        }
+
+        if (signatureInterface != null) {
+            // data to be signed
+            final InputStream dataToSign = getDataToSign();
+
+            // sign the bytes
+            byte[] signatureBytes = signatureInterface.sign(dataToSign);
+            writeExternalSignature(signatureBytes);
+        }
+        // else signature should created externally and set via writeSignature()
+    }
+
+    /**
+     * Return the stream of PDF data to be signed. Clients should use this method only to create
+     * signatures externally. {@link #write(PDDocument)} method should have been called prior. The
+     * created signature should be set using {@link #writeExternalSignature(byte[])}.
+     * <p>
+     * When {@link SignatureInterface} instance is used, COSWriter obtains and writes the signature
+     * itself.
+     * </p>
+     *
+     * @return data stream to be signed
+     * @throws IllegalStateException if PDF is not prepared for external signing
+     * @throws IOException           if input data is closed
+     */
+    public InputStream getDataToSign() throws IOException {
+        if (incrementPart == null || incrementalInput == null) {
+            throw new IllegalStateException("PDF not prepared for signing");
+        }
+        // range of incremental bytes to be signed (includes /ByteRange but not /Contents)
+        int incPartSigOffset = (int) (signatureOffset - incrementalInput.length());
+        int afterSigOffset = incPartSigOffset + (int) signatureLength;
+        int[] range =
+                {
+                        0, incPartSigOffset,
+                        afterSigOffset, incrementPart.length - afterSigOffset
+                };
+
+        return new SequenceInputStream(
+                new RandomAccessInputStream(incrementalInput),
+                new COSFilterInputStream(incrementPart, range));
+    }
+
+    /**
+     * Write externally created signature of PDF data obtained via {@link #getDataToSign()} method.
+     *
+     * @param cmsSignature CMS signature byte array
+     * @throws IllegalStateException if PDF is not prepared for external signing
+     * @throws IOException           if source data stream is closed
+     */
+    public void writeExternalSignature(byte[] cmsSignature) throws IOException {
+
+        if (incrementPart == null || incrementalInput == null) {
+            throw new IllegalStateException("PDF not prepared for setting signature");
+        }
+        byte[] signatureBytes = Hex.getBytes(cmsSignature);
+
+        // subtract 2 bytes because of the enclosing "<>"
+        if (signatureBytes.length > signatureLength - 2) {
+            throw new IOException("Can't write signature, not enough space");
+        }
+
+        // overwrite the signature Contents in the buffer
+        int incPartSigOffset = (int) (signatureOffset - incrementalInput.length());
+        System.arraycopy(signatureBytes, 0, incrementPart, incPartSigOffset + 1, signatureBytes.length);
+
+        // write the data to the incremental output stream
+        IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput);
+        incrementalOutput.write(incrementPart);
+
+        // prevent further use
+        incrementPart = null;
+    }
+
+    private void writeXrefRange(long x, long y) throws IOException {
+        getStandardOutput().write(String.valueOf(x).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(String.valueOf(y).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().writeEOL();
+    }
+
+    private void writeXrefEntry(COSWriterXRefEntry entry) throws IOException {
+        String offset = formatXrefOffset.format(entry.getOffset());
+        String generation = formatXrefGeneration.format(entry.getKey().getGeneration());
+        getStandardOutput().write(offset.getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(generation.getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(entry.isFree() ? XREF_FREE : XREF_USED);
+        getStandardOutput().writeCRLF();
+    }
+
+    /**
+     * check the xref entries and write out the ranges.  The format of the
+     * returned array is exactly the same as the pdf specification.  See section
+     * 7.5.4 of ISO32000-1:2008, example 1 (page 40) for reference.
+     * <p>
+     * example: 0 1 2 5 6 7 8 10
+     * <p>
+     * will create a array with follow ranges
+     * <p>
+     * 0 3 5 4 10 1
+     * <p>
+     * this mean that the element 0 is followed by two other related numbers
+     * that represent a cluster of the size 3. 5 is follow by three other
+     * related numbers and create a cluster of size 4. etc.
+     *
+     * @param xRefEntriesList list with the xRef entries that was written
+     * @return a integer array with the ranges
+     */
+    protected Long[] getXRefRanges(List<COSWriterXRefEntry> xRefEntriesList) {
+        long last = -2;
+        long count = 1;
+
+        List<Long> list = new ArrayList<>();
+        for (Object object : xRefEntriesList) {
+            long nr = (int) ((COSWriterXRefEntry) object).getKey().getNumber();
+            if (nr == last + 1) {
+                ++count;
+                last = nr;
+            } else if (last == -2) {
+                last = nr;
+            } else {
+                list.add(last - count + 1);
+                list.add(count);
+                last = nr;
+                count = 1;
+            }
+        }
+        // If no new entry is found, we need to write out the last result
+        if (xRefEntriesList.size() > 0) {
+            list.add(last - count + 1);
+            list.add(count);
+        }
+        return list.toArray(new Long[list.size()]);
+    }
+
+    /**
+     * This will get the object key for the object.
+     *
+     * @param obj The object to get the key for.
+     * @return The object key for the object.
+     */
+    private COSObjectKey getObjectKey(COSBase obj) {
+        COSBase actual = obj;
+        if (actual instanceof COSObject) {
+            actual = ((COSObject) obj).getObject();
+        }
+        // PDFBOX-4540: because objectKeys is accessible from outside, it is possible
+        // that a COSObject obj is already in the objectKeys map.
+        COSObjectKey key = objectKeys.get(obj);
+        if (key == null && actual != null) {
+            key = objectKeys.get(actual);
+        }
+        if (key == null) {
+            setNumber(getNumber() + 1);
+            key = new COSObjectKey(getNumber(), 0);
+            objectKeys.put(obj, key);
+            if (actual != null) {
+                objectKeys.put(actual, key);
+            }
+        }
+        return key;
+    }
+
+    @Override
+    public Object visitFromArray(COSArray obj) throws IOException {
+        int count = 0;
+        getStandardOutput().write(ARRAY_OPEN);
+        for (Iterator<COSBase> i = obj.iterator(); i.hasNext(); ) {
+            COSBase current = i.next();
+            if (current instanceof COSDictionary) {
+                if (current.isDirect()) {
+                    visitFromDictionary((COSDictionary) current);
+                } else {
+                    addObjectToWrite(current);
+                    writeReference(current);
+                }
+            } else if (current instanceof COSObject) {
+                COSBase subValue = ((COSObject) current).getObject();
+                if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) {
+                    // PDFBOX-4308: added willEncrypt to prevent an object
+                    // that is referenced several times from being written
+                    // direct and indirect, thus getting encrypted
+                    // with wrong object number or getting encrypted twice
+                    addObjectToWrite(current);
+                    writeReference(current);
+                } else {
+                    subValue.accept(this);
+                }
+            } else if (current == null) {
+                COSNull.NULL.accept(this);
+            } else {
+                current.accept(this);
+            }
+            count++;
+            if (i.hasNext()) {
+                if (count % 10 == 0) {
+                    getStandardOutput().writeEOL();
+                } else {
+                    getStandardOutput().write(SPACE);
+                }
+            }
+        }
+        getStandardOutput().write(ARRAY_CLOSE);
+        getStandardOutput().writeEOL();
+        return null;
+    }
+
+    @Override
+    public Object visitFromBoolean(COSBoolean obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromDictionary(COSDictionary obj) throws IOException {
+        if (!reachedSignature) {
+            COSBase itemType = obj.getItem(COSName.TYPE);
+            if (COSName.SIG.equals(itemType) || COSName.DOC_TIME_STAMP.equals(itemType)) {
+                reachedSignature = true;
+            }
+        }
+        getStandardOutput().write(DICT_OPEN);
+        getStandardOutput().writeEOL();
+        for (Map.Entry<COSName, COSBase> entry : obj.entrySet()) {
+            COSBase value = entry.getValue();
+            if (value != null) {
+                entry.getKey().accept(this);
+                getStandardOutput().write(SPACE);
+                if (value instanceof COSDictionary) {
+                    COSDictionary dict = (COSDictionary) value;
+
+                    if (!incrementalUpdate) {
+                        // write all XObjects as direct objects, this will save some size
+                        // PDFBOX-3684: but avoid dictionary that references itself
+                        COSBase item = dict.getItem(COSName.XOBJECT);
+                        if (item != null && !COSName.XOBJECT.equals(entry.getKey())) {
+                            item.setDirect(true);
+                        }
+                        item = dict.getItem(COSName.RESOURCES);
+                        if (item != null && !COSName.RESOURCES.equals(entry.getKey())) {
+                            item.setDirect(true);
+                        }
+                    }
+
+                    if (dict.isDirect()) {
+                        // If the object should be written direct, we need
+                        // to pass the dictionary to the visitor again.
+                        visitFromDictionary(dict);
+                    } else {
+                        addObjectToWrite(dict);
+                        writeReference(dict);
+                    }
+                } else if (value instanceof COSObject) {
+                    COSBase subValue = ((COSObject) value).getObject();
+                    if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) {
+                        // PDFBOX-4308: added willEncrypt to prevent an object
+                        // that is referenced several times from being written
+                        // direct and indirect, thus getting encrypted
+                        // with wrong object number or getting encrypted twice
+                        addObjectToWrite(value);
+                        writeReference(value);
+                    } else {
+                        subValue.accept(this);
+                    }
+                } else {
+                    // If we reach the pdf signature, we need to determinate the position of the
+                    // content and byterange
+                    if (reachedSignature && COSName.CONTENTS.equals(entry.getKey())) {
+                        signatureOffset = getStandardOutput().getPos();
+                        value.accept(this);
+                        signatureLength = getStandardOutput().getPos() - signatureOffset;
+                    } else if (reachedSignature && COSName.BYTERANGE.equals(entry.getKey())) {
+                        byteRangeArray = (COSArray) entry.getValue();
+                        byteRangeOffset = getStandardOutput().getPos() + 1;
+                        value.accept(this);
+                        byteRangeLength = getStandardOutput().getPos() - 1 - byteRangeOffset;
+                        reachedSignature = false;
+                    } else {
+                        value.accept(this);
+                    }
+                }
+                getStandardOutput().writeEOL();
+
+            } else {
+                //then we won't write anything, there are a couple cases
+                //were the value of an entry in the COSDictionary will
+                //be a dangling reference that points to nothing
+                //so we will just not write out the entry if that is the case
+            }
+        }
+        getStandardOutput().write(DICT_CLOSE);
+        getStandardOutput().writeEOL();
+        return null;
+    }
+
+    @Override
+    public Object visitFromDocument(COSDocument doc) throws IOException {
+        if (!incrementalUpdate) {
+            doWriteHeader(doc);
+        } else {
+            // Sometimes the original file will be missing a newline at the end
+            // In order to avoid having %%EOF the first object on the same line
+            // as the %%EOF, we put a newline here. If there's already one at
+            // the end of the file, an extra one won't hurt. PDFBOX-1051
+            getStandardOutput().writeCRLF();
+        }
+
+        doWriteBody(doc);
+
+        // get the previous trailer
+        COSDictionary trailer = doc.getTrailer();
+        long hybridPrev = -1;
+
+        if (trailer != null) {
+            hybridPrev = trailer.getLong(COSName.XREF_STM);
+        }
+
+        if (incrementalUpdate || doc.isXRefStream()) {
+            doWriteXRefInc(doc, hybridPrev);
+        } else {
+            doWriteXRefTable();
+            doWriteTrailer(doc);
+        }
+
+        // write endof
+        getStandardOutput().write(STARTXREF);
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(String.valueOf(getStartxref()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().writeEOL();
+        getStandardOutput().write(EOF);
+        getStandardOutput().writeEOL();
+
+        if (incrementalUpdate) {
+            if (signatureOffset == 0 || byteRangeOffset == 0) {
+                doWriteIncrement();
+            } else {
+                doWriteSignature();
+            }
+        }
+
+        return null;
+    }
+
+    @Override
+    public Object visitFromFloat(COSFloat obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromInt(COSInteger obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromName(COSName obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    @Override
+    public Object visitFromNull(COSNull obj) throws IOException {
+        obj.writePDF(getStandardOutput());
+        return null;
+    }
+
+    /**
+     * visitFromObjRef method comment.
+     *
+     * @param obj The object that is being visited.
+     * @throws IOException If there is an exception while visiting this object.
+     */
+    public void writeReference(COSBase obj) throws IOException {
+        COSObjectKey key = getObjectKey(obj);
+        getStandardOutput().write(String.valueOf(key.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(String.valueOf(key.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+        getStandardOutput().write(SPACE);
+        getStandardOutput().write(REFERENCE);
+    }
+
+    @Override
+    public Object visitFromStream(COSStream obj) throws IOException {
+        if (willEncrypt) {
+            pdDocument.getEncryption().getSecurityHandler()
+                    .encryptStream(obj, currentObjectKey.getNumber(), currentObjectKey.getGeneration());
+        }
+
+        InputStream input = null;
+        try {
+            // write the stream content
+            visitFromDictionary(obj);
+            getStandardOutput().write(STREAM);
+            getStandardOutput().writeCRLF();
+
+            input = obj.createRawInputStream();
+            IOUtils.copy(input, getStandardOutput());
+
+            getStandardOutput().writeCRLF();
+            getStandardOutput().write(ENDSTREAM);
+            getStandardOutput().writeEOL();
+            return null;
+        } finally {
+            if (input != null) {
+                input.close();
+            }
+        }
+    }
+
+    @Override
+    public Object visitFromString(COSString obj) throws IOException {
+        if (willEncrypt) {
+            pdDocument.getEncryption().getSecurityHandler().encryptString(
+                    obj,
+                    currentObjectKey.getNumber(),
+                    currentObjectKey.getGeneration());
+        }
+        COSWriter.writeString(obj, getStandardOutput());
+        return null;
+    }
+
+    /**
+     * This will write the pdf document.
+     *
+     * @param doc The document to write.
+     * @throws IOException If an error occurs while generating the data.
+     */
+    public void write(COSDocument doc) throws IOException {
+        PDDocument pdDoc = new PDDocument(doc);
+        write(pdDoc);
+    }
+
+    /**
+     * This will write the pdf document. If signature should be created externally,
+     * {@link #writeExternalSignature(byte[])} should be invoked to set signature after calling this method.
+     *
+     * @param doc The document to write.
+     * @throws IOException If an error occurs while generating the data.
+     */
+    public void write(PDDocument doc) throws IOException {
+        write(doc, null);
+    }
+
+    /**
+     * This will write the pdf document. If signature should be created externally,
+     * {@link #writeExternalSignature(byte[])} should be invoked to set signature after calling this method.
+     *
+     * @param doc           The document to write.
+     * @param signInterface class to be used for signing; {@code null} if external signing would be performed
+     *                      or there will be no signing at all
+     * @throws IOException           If an error occurs while generating the data.
+     * @throws IllegalStateException If the document has an encryption dictionary but no protection
+     *                               policy.
+     */
+    public void write(PDDocument doc, SignatureInterface signInterface) throws IOException {
+        Long idTime = doc.getDocumentId() == null ? System.currentTimeMillis() :
+                doc.getDocumentId();
+
+        pdDocument = doc;
+        signatureInterface = signInterface;
+
+        if (incrementalUpdate) {
+            prepareIncrement(doc);
+        }
+
+        // if the document says we should remove encryption, then we shouldn't encrypt
+        if (doc.isAllSecurityToBeRemoved()) {
+            willEncrypt = false;
+            // also need to get rid of the "Encrypt" in the trailer so readers
+            // don't try to decrypt a document which is not encrypted
+            COSDocument cosDoc = doc.getDocument();
+            COSDictionary trailer = cosDoc.getTrailer();
+            trailer.removeItem(COSName.ENCRYPT);
+        } else {
+            if (pdDocument.getEncryption() != null) {
+                if (!incrementalUpdate) {
+                    SecurityHandler securityHandler = pdDocument.getEncryption().getSecurityHandler();
+                    if (!securityHandler.hasProtectionPolicy()) {
+                        throw new IllegalStateException("PDF contains an encryption dictionary, please remove it with "
+                                + "setAllSecurityToBeRemoved() or set a protection policy with protect()");
+                    }
+                    securityHandler.prepareDocumentForEncryption(pdDocument);
+                }
+                willEncrypt = true;
+            } else {
+                willEncrypt = false;
+            }
+        }
+
+        COSDocument cosDoc = pdDocument.getDocument();
+        COSDictionary trailer = cosDoc.getTrailer();
+        COSArray idArray;
+        boolean missingID = true;
+        COSBase base = trailer.getDictionaryObject(COSName.ID);
+        if (base instanceof COSArray) {
+            idArray = (COSArray) base;
+            if (idArray.size() == 2) {
+                missingID = false;
+            }
+        } else {
+            idArray = new COSArray();
+        }
+        if (missingID || incrementalUpdate) {
+            MessageDigest md5;
+            try {
+                md5 = MessageDigest.getInstance("MD5");
+            } catch (NoSuchAlgorithmException e) {
+                // should never happen
+                throw new RuntimeException(e);
+            }
+
+            // algorithm says to use time/path/size/values in doc to generate the id.
+            // we don't have path or size, so do the best we can
+            md5.update(Long.toString(idTime).getBytes(StandardCharsets.ISO_8859_1));
+
+            COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
+            if (info != null) {
+                for (COSBase cosBase : info.getValues()) {
+                    md5.update(cosBase.toString().getBytes(StandardCharsets.ISO_8859_1));
+                }
+            }
+            // reuse origin documentID if available as first value
+            COSString firstID = missingID ? new COSString(md5.digest()) : (COSString) idArray.get(0);
+            // it's ok to use the same ID for the second part if the ID is created for the first time
+            COSString secondID = missingID ? firstID : new COSString(md5.digest());
+            idArray = new COSArray();
+            idArray.add(firstID);
+            idArray.add(secondID);
+            trailer.setItem(COSName.ID, idArray);
+        }
+        cosDoc.accept(this);
+    }
+
+    /**
+     * This will write the fdf document.
+     *
+     * @param doc The document to write.
+     * @throws IOException If an error occurs while generating the data.
+     */
+    public void write(FDFDocument doc) throws IOException {
+        fdfDocument = doc;
+        willEncrypt = false;
+        COSDocument cosDoc = fdfDocument.getDocument();
+        cosDoc.accept(this);
+    }
+
+    /**
+     * This will output the given byte getString as a PDF object.
+     *
+     * @param string COSString to be written
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    public static void writeString(COSString string, OutputStream output) throws IOException {
+        writeString(string.getBytes(), string.getForceHexForm(), output);
+    }
+
+    /**
+     * This will output the given text/byte getString as a PDF object.
+     *
+     * @param bytes  byte array representation of a string to be written
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    public static void writeString(byte[] bytes, OutputStream output) throws IOException {
+        writeString(bytes, false, output);
+    }
+
+    /**
+     * This will output the given text/byte string as a PDF object.
+     *
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    private static void writeString(byte[] bytes, boolean forceHex, OutputStream output)
+            throws IOException {
+        // check for non-ASCII characters
+        boolean isASCII = true;
+        if (!forceHex) {
+            for (byte b : bytes) {
+                // if the byte is negative then it is an eight bit byte and is outside the ASCII range
+                if (b < 0) {
+                    isASCII = false;
+                    break;
+                }
+                // PDFBOX-3107 EOL markers within a string are troublesome
+                if (b == 0x0d || b == 0x0a) {
+                    isASCII = false;
+                    break;
+                }
+            }
+        }
+
+        if (isASCII && !forceHex) {
+            // write ASCII string
+            output.write('(');
+            for (byte b : bytes) {
+                switch (b) {
+                    case '(':
+                    case ')':
+                    case '\\':
+                        output.write('\\');
+                        output.write(b);
+                        break;
+                    default:
+                        output.write(b);
+                        break;
+                }
+            }
+            output.write(')');
+        } else {
+            // write hex string
+            output.write('<');
+            Hex.writeHexBytes(bytes, output);
+            output.write('>');
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
new file mode 100644
index 0000000..ab7fa11
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.pdf;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.fuzzing.exceptions.CantFuzzException;
+import org.apache.tika.mime.MediaType;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Set;
+
+public class PDFTransformer implements Transformer {
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+    @Override
+    public Set<MediaType> getSupportedTypes() {
+        return SUPPORTED_TYPES;
+    }
+
+    private PDFTransformerConfig config = new PDFTransformerConfig();
+
+    @Override
+    public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
+        try (PDDocument pdDocument = PDDocument.load(is)) {
+            try (EvilCOSWriter cosWriter = new EvilCOSWriter(os, config)) {
+                cosWriter.write(pdDocument);
+            }
+        } catch (InvalidPasswordException e) {
+            throw new CantFuzzException("encrypted doc");
+        }
+    }
+}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
new file mode 100644
index 0000000..d152878
--- /dev/null
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fuzzing.pdf;
+
+public class PDFTransformerConfig {
+
+    private boolean randomizeObjectNumbers = true;
+
+    public boolean getRandomizeObjectNumbers() {
+        return randomizeObjectNumbers;
+    }
+}
diff --git a/tika-fuzzing/src/main/resources/META-INF/services/org.apache.tika.fuzzing.Transformer b/tika-fuzzing/src/main/resources/META-INF/services/org.apache.tika.fuzzing.Transformer
new file mode 100644
index 0000000..07390de
--- /dev/null
+++ b/tika-fuzzing/src/main/resources/META-INF/services/org.apache.tika.fuzzing.Transformer
@@ -0,0 +1,17 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.fuzzing.general.GeneralTransformer
+#org.apache.tika.fuzzing.pdf.PDFTransformer
\ No newline at end of file
diff --git a/tika-fuzzing/src/main/resources/log4j.properties b/tika-fuzzing/src/main/resources/log4j.properties
new file mode 100644
index 0000000..7d3b372
--- /dev/null
+++ b/tika-fuzzing/src/main/resources/log4j.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#info,debug, error,fatal ...
+log4j.rootLogger=info,stderr
+
+#console
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
+log4j.appender.stderr.Target=System.err
+
+log4j.appender.stderr.layout.ConversionPattern= %-5p %m%n
diff --git a/tika-fuzzing/src/test/java/TestFuzzingCLI.java b/tika-fuzzing/src/test/java/TestFuzzingCLI.java
new file mode 100644
index 0000000..a98291b
--- /dev/null
+++ b/tika-fuzzing/src/test/java/TestFuzzingCLI.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.fuzzing.cli.FuzzingCLI;
+import org.apache.tika.utils.ProcessUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+public class TestFuzzingCLI {
+
+    @Test
+    @Ignore
+    public void testBasic() throws Exception {
+        //convert to actual unit test
+        String inputDir = "";// fill in
+        String outputDir = "";//fill in
+        String[] args = new String[] {
+                "-i", inputDir,
+                "-o", outputDir,
+                "-n", "8", // num threads
+                "-t", "1", //max transformers
+                "-p", "100", //per file iterations
+                "-r", "3"
+        };
+        FuzzingCLI.main(args);
+    }
+
+    @Test
+    @Ignore
+    public void testMock() throws Exception {
+        //convert to actual unit test
+        Path inputDir = Paths.get(getClass().getResource("/test-documents").toURI());
+        Path outputDir = Files.createTempDirectory("tika-fuzzing-");
+        String[] args = new String[] {
+                "-i", ProcessUtils.escapeCommandLine(inputDir.toAbsolutePath().toString()),
+                "-o", ProcessUtils.escapeCommandLine(outputDir.toAbsolutePath().toString()),
+                "-n", "8", // num threads
+                "-t", "0", //max transformers
+                "-p", "10", //per file iterations
+                "-m", "10000", //max ms per file
+                "-r", "3"
+        };
+        try {
+            FuzzingCLI.main(args);
+        } finally {
+            FileUtils.deleteDirectory(outputDir.toFile());
+        }
+    }
+}
diff --git a/tika-fuzzing/src/test/java/TestTransformer.java b/tika-fuzzing/src/test/java/TestTransformer.java
new file mode 100644
index 0000000..1db2e1e
--- /dev/null
+++ b/tika-fuzzing/src/test/java/TestTransformer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.tika.fuzzing.general.GeneralTransformer;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+
+public class TestTransformer {
+
+    @Test
+    @Ignore
+    public void testBasic() throws Exception {
+        //turn into actual unit test
+        Path path = Paths.get("");//put something meaningful here
+
+        GeneralTransformer transformer = new GeneralTransformer();
+        byte[] bytes = Files.readAllBytes(path);
+
+        for (int i = 0; i < 100; i++) {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            transformer.transform(
+                    new ByteArrayInputStream(bytes), bos);
+
+            if (Arrays.equals(bos.toByteArray(), bytes)) {
+                System.out.println("SAME");
+            }
+        }
+    }
+}
diff --git a/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml b/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
new file mode 100644
index 0000000..f1f5b67
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <hang millis="30000" heavy="true" pulse_millis="100" />
+</mock>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml b/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
new file mode 100644
index 0000000..4561c3a
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <throw class="java.lang.NullPointerException">another null pointer exception</throw>
+</mock>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/test-documents/system_exit.xml b/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
new file mode 100644
index 0000000..75d1d3b
--- /dev/null
+++ b/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<mock>
+    <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+    <write element="p">some content</write>
+    <system_exit />
+</mock>
\ No newline at end of file