You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/11/22 20:42:38 UTC
[tika] branch main updated: TIKA-3324 -- add checkstyle for fuzzing module
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 9d4d257 TIKA-3324 -- add checkstyle for fuzzing module
9d4d257 is described below
commit 9d4d25759a1e8afb5a1093005d8ef47baeebec1c
Author: tballison <ta...@apache.org>
AuthorDate: Mon Nov 22 15:42:31 2021 -0500
TIKA-3324 -- add checkstyle for fuzzing module
---
tika-fuzzing/pom.xml | 41 +-
.../apache/tika/fuzzing/AutoDetectTransformer.java | 16 +-
.../java/org/apache/tika/fuzzing/Transformer.java | 8 +-
.../java/org/apache/tika/fuzzing/cli/FuzzOne.java | 156 +++----
.../org/apache/tika/fuzzing/cli/FuzzingCLI.java | 75 ++--
.../apache/tika/fuzzing/cli/FuzzingCLIConfig.java | 129 ++----
.../apache/tika/fuzzing/general/ByteDeleter.java | 9 +-
.../apache/tika/fuzzing/general/ByteFlipper.java | 14 +-
.../apache/tika/fuzzing/general/ByteInjector.java | 13 +-
.../tika/fuzzing/general/GeneralTransformer.java | 31 +-
.../apache/tika/fuzzing/general/SpanSwapper.java | 20 +-
.../org/apache/tika/fuzzing/general/Truncator.java | 13 +-
.../org/apache/tika/fuzzing/pdf/EvilCOSWriter.java | 473 ++++++++++-----------
.../apache/tika/fuzzing/pdf/PDFTransformer.java | 20 +-
.../tika/fuzzing/pdf/PDFTransformerConfig.java | 64 +--
tika-fuzzing/src/test/java/TestFuzzingCLI.java | 31 +-
tika-fuzzing/src/test/java/TestTransformer.java | 11 +-
tika-fuzzing/src/test/resources/log4j.properties | 24 --
.../null_pointer.xml => log4j2.xml} | 21 +-
.../test/resources/test-documents/heavy_hang.xml | 6 +-
.../test/resources/test-documents/null_pointer.xml | 6 +-
.../test/resources/test-documents/system_exit.xml | 6 +-
22 files changed, 564 insertions(+), 623 deletions(-)
diff --git a/tika-fuzzing/pom.xml b/tika-fuzzing/pom.xml
index 5a75929..253119f 100644
--- a/tika-fuzzing/pom.xml
+++ b/tika-fuzzing/pom.xml
@@ -17,7 +17,8 @@
specific language governing permissions and limitations
under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parent</artifactId>
@@ -87,8 +88,38 @@
<scope>test</scope>
</dependency>
</dependencies>
-
- <scm>
- <tag>2.1.0-rc2</tag>
- </scm>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>${checkstyle.plugin.version}</version>
+ <dependencies>
+ <dependency>
+ <groupId>com.puppycrawl.tools</groupId>
+ <artifactId>checkstyle</artifactId>
+ <version>${puppycrawl.version}</version>
+ </dependency>
+ </dependencies>
+ <executions>
+ <execution>
+ <id>validate</id>
+ <phase>validate</phase>
+ <configuration>
+ <configLocation>checkstyle.xml</configLocation>
+ <encoding>UTF-8</encoding>
+ <consoleOutput>false</consoleOutput>
+ <includeTestSourceDirectory>true</includeTestSourceDirectory>
+ <testSourceDirectories>${project.basedir}/src/test/java</testSourceDirectories>
+ <violationSeverity>error</violationSeverity>
+ <failOnViolation>true</failOnViolation>
+ </configuration>
+ <goals>
+ <goal>check</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
</project>
\ No newline at end of file
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
index f27f4a0..05bf5e2 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/AutoDetectTransformer.java
@@ -16,6 +16,14 @@
*/
package org.apache.tika.fuzzing;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
@@ -26,14 +34,6 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
public class AutoDetectTransformer implements Transformer {
private static final ServiceLoader DEFAULT_LOADER =
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
index 915c29c..57a710f 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/Transformer.java
@@ -16,22 +16,22 @@
*/
package org.apache.tika.fuzzing;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.mime.MediaType;
-
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Set;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.mime.MediaType;
+
public interface Transformer {
/**
* Returns the set of media types supported by this parser when used
* with the given parse context.
*
- * @since Apache Tika 1.24.1
* @return immutable set of media types
+ * @since Apache Tika 1.24.1
*/
Set<MediaType> getSupportedTypes();
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
index 98c4438..af2351d 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzOne.java
@@ -16,12 +16,32 @@
*/
package org.apache.tika.fuzzing.cli;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
import org.apache.tika.exception.TikaException;
import org.apache.tika.fuzzing.AutoDetectTransformer;
import org.apache.tika.fuzzing.Transformer;
@@ -31,25 +51,6 @@ import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.utils.ExceptionUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
/**
* Forked process that runs against a single input file
@@ -58,55 +59,30 @@ public class FuzzOne {
private static final Logger LOG = LoggerFactory.getLogger(FuzzOne.class);
static Options OPTIONS;
+
static {
//By the time this commandline is parsed, there should be both an extracts and an inputDir
Option extracts = new Option("extracts", true, "directory for extract files");
extracts.setRequired(true);
- OPTIONS = new Options()
- .addOption(Option.builder("i")
- .longOpt("inputFile")
- .desc("input directory for seed files")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("o")
- .longOpt("outputFile")
- .desc("output file base")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("m")
- .longOpt("timeoutMs")
- .desc("timeout in ms -- max time allowed to parse a file")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("n")
- .desc("thread id (thread number)")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("p")
- .longOpt("perFile")
- .desc("number of iterations to run per seed file")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("t")
- .longOpt("maxTransformers")
- .desc("maximum number of transformers to run per iteration")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("r")
- .longOpt("retryId")
- .desc("which retry is this")
- .hasArg(true)
- .required(true)
- .build());
+ OPTIONS = new Options().addOption(
+ Option.builder("i").longOpt("inputFile").desc("input directory for seed files")
+ .hasArg(true).required(true).build()).addOption(
+ Option.builder("o").longOpt("outputFile").desc("output file base").hasArg(true)
+ .required(true).build()).addOption(Option.builder("m").longOpt("timeoutMs")
+ .desc("timeout in ms -- max time allowed to parse a file").hasArg(true)
+ .required(true).build()).addOption(
+ Option.builder("n").desc("thread id (thread number)").hasArg(true).required(true)
+ .build()).addOption(Option.builder("p").longOpt("perFile")
+ .desc("number of iterations to run per seed file").hasArg(true).required(true)
+ .build()).addOption(Option.builder("t").longOpt("maxTransformers")
+ .desc("maximum number of transformers to run per iteration").hasArg(true)
+ .required(true).build()).addOption(
+ Option.builder("r").longOpt("retryId").desc("which retry is this").hasArg(true)
+ .required(true).build());
}
+
Parser parser = new AutoDetectParser();
public static void main(String[] args) throws Exception {
@@ -121,12 +97,12 @@ public class FuzzOne {
AutoDetectTransformer transformer = new AutoDetectTransformer();
for (int i = 0; i < config.perFileIterations; i++) {
try {
- String ext = "-"+config.threadNum + "-" + config.retryNum + "-"+i;
+ String ext = "-" + config.threadNum + "-" + config.retryNum + "-" + i;
fuzz(ext, src, targetDir, transformer, config.timeoutMs);
} catch (IOException e) {
LOG.warn("problem transforming file", e);
} catch (CantFuzzException e) {
- LOG.warn("can't fuzz this file "+src, e);
+ LOG.warn("can't fuzz this file " + src, e);
return;
} catch (TikaException e) {
e.printStackTrace();
@@ -134,11 +110,11 @@ public class FuzzOne {
}
}
- private void fuzz(String ext, Path src, Path targetFileBase,
- Transformer transformer, long timeoutMs) throws IOException, TikaException {
+ private void fuzz(String ext, Path src, Path targetFileBase, Transformer transformer,
+ long timeoutMs) throws IOException, TikaException {
- Path target = targetFileBase.getParent().resolve(
- targetFileBase.getFileName().toString() +ext);
+ Path target =
+ targetFileBase.getParent().resolve(targetFileBase.getFileName().toString() + ext);
try {
transformFile(transformer, src, target);
@@ -153,15 +129,15 @@ public class FuzzOne {
try {
int result = future.get(timeoutMs, TimeUnit.MILLISECONDS);
if (result == 1 && Files.exists(target)) {
- LOG.warn("failed to delete target: "+target);
+ LOG.warn("failed to delete target: " + target);
}
} catch (TimeoutException e) {
- LOG.warn("timeout exception:"+target);
+ LOG.warn("timeout exception:" + target);
future.cancel(true);
writeErrFile(target, ".timeout");
System.exit(1);
- } catch (InterruptedException|ExecutionException e) {
- LOG.warn("problem parsing "+target, e);
+ } catch (InterruptedException | ExecutionException e) {
+ LOG.warn("problem parsing " + target, e);
System.exit(1);
} finally {
executor.shutdownNow();
@@ -170,7 +146,7 @@ public class FuzzOne {
private void writeErrFile(Path target, String ext) {
try {
- Path err = target.getParent().resolve(target.getFileName().toString()+ext);
+ Path err = target.getParent().resolve(target.getFileName().toString() + ext);
Files.write(err, new byte[0]);
} catch (IOException e) {
LOG.warn("things aren't going right today.", e);
@@ -180,7 +156,8 @@ public class FuzzOne {
private void handleThrowable(Path target, Throwable t) {
try {
- Path errMsg = target.getParent().resolve(target.getFileName().toString()+".stacktrace");
+ Path errMsg =
+ target.getParent().resolve(target.getFileName().toString() + ".stacktrace");
Files.write(errMsg, ExceptionUtils.getStackTrace(t).getBytes(StandardCharsets.UTF_8));
} catch (IOException e) {
LOG.warn("things aren't going right today.", t);
@@ -188,14 +165,23 @@ public class FuzzOne {
}
- private void transformFile(Transformer transformer, Path src, Path target) throws IOException, TikaException {
- try (InputStream is = Files.newInputStream(src); OutputStream os =
- Files.newOutputStream(target)) {
+ private void transformFile(Transformer transformer, Path src, Path target)
+ throws IOException, TikaException {
+ try (InputStream is = Files.newInputStream(src);
+ OutputStream os = Files.newOutputStream(target)) {
transformer.transform(is, os);
}
}
private static class FuzzOneConfig {
+ int perFileIterations;
+ int maxTransformers;
+ int threadNum;
+ int retryNum;
+ long timeoutMs;
+ private Path inputFile;
+ private Path outputFileBase;
+
static FuzzOneConfig parse(String[] args) throws ParseException {
CommandLineParser parser = new DefaultParser();
CommandLine commandLine = parser.parse(OPTIONS, args);
@@ -210,24 +196,16 @@ public class FuzzOne {
return config;
}
- private Path inputFile;
- private Path outputFileBase;
- int perFileIterations;
- int maxTransformers;
- int threadNum;
- int retryNum;
- long timeoutMs;
-
}
private class ParseTask implements Callable<Integer> {
private final Path target;
+
public ParseTask(Path target) {
this.target = target;
}
/**
- *
* @return 1 if success
* @throws Exception
*/
@@ -235,7 +213,7 @@ public class FuzzOne {
public Integer call() throws Exception {
boolean success = false;
try (InputStream is = Files.newInputStream(target)) {
- LOG.debug("parsing "+target);
+ LOG.debug("parsing " + target);
parser.parse(is, new DefaultHandler(), new Metadata(), new ParseContext());
success = true;
} catch (TikaException e) {
@@ -245,7 +223,7 @@ public class FuzzOne {
} else {
success = true;
}
- } catch (SAXException|IOException e) {
+ } catch (SAXException | IOException e) {
success = true;
} catch (Throwable t) {
handleThrowable(target, t);
@@ -254,10 +232,10 @@ public class FuzzOne {
try {
Files.delete(target);
} catch (IOException e) {
- LOG.warn("couldn't delete: "+target.toAbsolutePath());
+ LOG.warn("couldn't delete: " + target.toAbsolutePath());
}
} else {
- LOG.info("FOUND PROBLEM: "+target);
+ LOG.info("FOUND PROBLEM: " + target);
}
}
return success ? 1 : 0;
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
index 2c29803..10453e6 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLI.java
@@ -16,10 +16,6 @@
*/
package org.apache.tika.fuzzing.cli;
-import org.apache.tika.utils.ProcessUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
@@ -37,19 +33,24 @@ import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.tika.utils.ProcessUtils;
+
public class FuzzingCLI {
private static final Logger LOG = LoggerFactory.getLogger(FuzzingCLI.class);
private static final Path POISON = Paths.get("");
- private int maxFiles = -1;
+ private final int maxFiles = -1;
- public static void main (String[] args) throws Exception {
+ public static void main(String[] args) throws Exception {
FuzzingCLIConfig config = FuzzingCLIConfig.parse(args);
if (config.getMaxTransformers() == 0) {
LOG.warn("max transformers == 0!");
}
- if (! Files.isDirectory(config.inputDir)) {
+ if (!Files.isDirectory(config.inputDir)) {
throw new IllegalArgumentException("input directory doesn't exist: " + config.inputDir);
}
FuzzingCLI fuzzingCLI = new FuzzingCLI();
@@ -59,15 +60,16 @@ public class FuzzingCLI {
private void execute(FuzzingCLIConfig config) {
ArrayBlockingQueue<Path> q = new ArrayBlockingQueue(10000);
- ExecutorService executorService = Executors.newFixedThreadPool(config.getNumThreads()+1);
- ExecutorCompletionService executorCompletionService = new ExecutorCompletionService(executorService);
+ ExecutorService executorService = Executors.newFixedThreadPool(config.getNumThreads() + 1);
+ ExecutorCompletionService executorCompletionService =
+ new ExecutorCompletionService(executorService);
FileAdder fileAdder = new FileAdder(config.getInputDirectory(), config.getNumThreads(), q);
executorCompletionService.submit(fileAdder);
for (int i = 0; i < config.numThreads; i++) {
executorCompletionService.submit(new Fuzzer(q, config));
}
int finished = 0;
- while (finished < config.getNumThreads()+1) {
+ while (finished < config.getNumThreads() + 1) {
Future<Integer> future = null;
try {
future = executorCompletionService.poll(1, TimeUnit.SECONDS);
@@ -88,6 +90,7 @@ public class FuzzingCLI {
private final int threadId = COUNTER.getAndIncrement();
private final ArrayBlockingQueue<Path> q;
private final FuzzingCLIConfig config;
+
public Fuzzer(ArrayBlockingQueue<Path> q, FuzzingCLIConfig config) {
this.q = q;
this.config = config;
@@ -98,14 +101,14 @@ public class FuzzingCLI {
while (true) {
Path p = q.take();
if (p.equals(POISON)) {
- LOG.debug("Thread "+threadId + " stopping");
+ LOG.debug("Thread " + threadId + " stopping");
return 1;
}
boolean success = false;
int tries = 0;
- while (! success && tries < config.getRetries()) {
+ while (!success && tries < config.getRetries()) {
if (tries > 0) {
- LOG.warn("Retrying ("+tries+") "+p);
+ LOG.warn("Retrying (" + tries + ") " + p);
}
success = fuzzIt(config, p, tries);
tries++;
@@ -116,33 +119,19 @@ public class FuzzingCLI {
private boolean fuzzIt(FuzzingCLIConfig config, Path p, int retryId) {
//the target files should be flattened so that
//problematic files are all in one directory...may rethink this option later
- Path target = config.getOutputDirectory().resolve(
- p.getFileName());
+ Path target = config.getOutputDirectory().resolve(p.getFileName());
String cp = System.getProperty("java.class.path");
- String[] args = new String[] {
- "java",
- "-XX:-OmitStackTraceInFastThrow",
- "-Xmx"+config.xmx,
- "-ea",
- "-cp",
- ProcessUtils.escapeCommandLine(cp),
- "org.apache.tika.fuzzing.cli.FuzzOne",
- "-i",
- ProcessUtils.escapeCommandLine(p.toAbsolutePath().toString()),
- "-o",
- ProcessUtils.escapeCommandLine(target.toAbsolutePath().toString()),
- "-p",
- Integer.toString(config.getPerFileIterations()),
- "-t",
- Integer.toString(config.getMaxTransformers()),
- "-n",
- Integer.toString(threadId),
- "-r",
- Integer.toString(retryId),
- "-m",
- Long.toString(config.getTimeoutMs())
- };
+ String[] args =
+ new String[]{"java", "-XX:-OmitStackTraceInFastThrow", "-Xmx" + config.xmx,
+ "-ea", "-cp", ProcessUtils.escapeCommandLine(cp),
+ "org.apache.tika.fuzzing.cli.FuzzOne", "-i",
+ ProcessUtils.escapeCommandLine(p.toAbsolutePath().toString()), "-o",
+ ProcessUtils.escapeCommandLine(target.toAbsolutePath().toString()),
+ "-p", Integer.toString(config.getPerFileIterations()), "-t",
+ Integer.toString(config.getMaxTransformers()), "-n",
+ Integer.toString(threadId), "-r", Integer.toString(retryId), "-m",
+ Long.toString(config.getTimeoutMs())};
ProcessBuilder pb = new ProcessBuilder(args);
pb.inheritIO();
Process process = null;
@@ -184,6 +173,7 @@ public class FuzzingCLI {
private final int numThreads;
private final ArrayBlockingQueue<Path> queue;
private int added = 0;
+
public FileAdder(Path inputDirectory, int numThreads, ArrayBlockingQueue<Path> queue) {
this.inputDir = inputDirectory;
this.numThreads = numThreads;
@@ -202,12 +192,14 @@ public class FuzzingCLI {
private class DirWalker implements FileVisitor<Path> {
@Override
- public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
+ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
+ throws IOException {
return FileVisitResult.CONTINUE;
}
@Override
- public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
+ throws IOException {
if (maxFiles > -1 && added >= maxFiles) {
LOG.info("hit maxfiles; file crawler is stopping early");
return FileVisitResult.TERMINATE;
@@ -236,7 +228,8 @@ public class FuzzingCLI {
}
@Override
- public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+ public FileVisitResult postVisitDirectory(Path dir, IOException exc)
+ throws IOException {
return FileVisitResult.CONTINUE;
}
}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
index 5563307..c741616 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/cli/FuzzingCLIConfig.java
@@ -16,6 +16,9 @@
*/
package org.apache.tika.fuzzing.cli;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
@@ -23,9 +26,6 @@ import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-
public class FuzzingCLIConfig {
private static final int DEFAULT_NUM_THREADS = 4;
@@ -40,109 +40,72 @@ public class FuzzingCLIConfig {
private static final String DEFAULT_XMX = "512m";
static Options OPTIONS;
+
static {
//By the time this commandline is parsed, there should be both an extracts and an inputDir
Option extracts = new Option("extracts", true, "directory for extract files");
extracts.setRequired(true);
- OPTIONS = new Options()
- .addOption(Option.builder("i")
- .longOpt("inputDir")
- .desc("input directory for seed files")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("o")
- .longOpt("outputDir")
- .desc("output directory for files that triggered problems")
- .hasArg(true)
- .required(true)
- .build())
- .addOption(Option.builder("n")
- .longOpt("numThreads")
- .desc("number of threads")
- .hasArg(true)
- .required(false)
- .build())
- .addOption(Option.builder("p")
- .longOpt("perFile")
- .desc("number of iterations to run per seed file")
- .hasArg(true)
- .required(false)
- .build())
- .addOption(Option.builder("t")
- .longOpt("maxTransformers")
- .desc("maximum number of transformers to run per iteration")
- .hasArg(true)
- .required(false)
- .build())
- .addOption(Option.builder("m")
- .longOpt("timeoutMs")
- .desc("timeout in ms -- max time allowed to parse a file")
- .hasArg(true)
- .required(false)
- .build())
- .addOption(Option.builder("x")
- .longOpt("xmx")
- .desc("e.g. 1G, max heap appended to -Xmx in the forked process")
- .hasArg(true)
- .required(false)
- .build())
- .addOption(Option.builder("r")
- .longOpt("retries")
- .desc("number of times to retry a seed file if there's a catastrophic failure")
- .hasArg(true)
- .required(false)
- .build());
+ OPTIONS = new Options().addOption(
+ Option.builder("i").longOpt("inputDir").desc("input directory for seed files")
+ .hasArg(true).required(true).build()).addOption(
+ Option.builder("o").longOpt("outputDir")
+ .desc("output directory for files that triggered problems").hasArg(true)
+ .required(true).build()).addOption(
+ Option.builder("n").longOpt("numThreads").desc("number of threads").hasArg(true)
+ .required(false).build()).addOption(Option.builder("p").longOpt("perFile")
+ .desc("number of iterations to run per seed file").hasArg(true).required(false)
+ .build()).addOption(Option.builder("t").longOpt("maxTransformers")
+ .desc("maximum number of transformers to run per iteration").hasArg(true)
+ .required(false).build()).addOption(Option.builder("m").longOpt("timeoutMs")
+ .desc("timeout in ms -- max time allowed to parse a file").hasArg(true)
+ .required(false).build()).addOption(Option.builder("x").longOpt("xmx")
+ .desc("e.g. 1G, max heap appended to -Xmx in the forked process").hasArg(true)
+ .required(false).build()).addOption(Option.builder("r").longOpt("retries")
+ .desc("number of times to retry a seed file if there's a catastrophic failure")
+ .hasArg(true).required(false).build());
}
- public static FuzzingCLIConfig parse(String[] args) throws ParseException {
- CommandLineParser parser = new DefaultParser();
- CommandLine commandLine = parser.parse(OPTIONS, args);
- FuzzingCLIConfig config = new FuzzingCLIConfig();
- config.inputDir = Paths.get(commandLine.getOptionValue("i"));
- config.outputDir = Paths.get(commandLine.getOptionValue("o"));
- config.numThreads = (commandLine.hasOption("n")) ?
- Integer.parseInt(commandLine.getOptionValue("n")) :
- DEFAULT_NUM_THREADS;
- config.perFileIterations = (commandLine.hasOption("p")) ?
- Integer.parseInt(commandLine.getOptionValue("p")) :
- DEFAULT_NUM_ITERATIONS;
- config.maxTransformers = (commandLine.hasOption("t")) ?
- Integer.parseInt(commandLine.getOptionValue("t")) :
- DEFAULT_MAX_TRANSFORMERS;
- config.timeoutMS = (commandLine.hasOption("m")) ?
- Integer.parseInt(commandLine.getOptionValue("m")) :
- DEFAULT_TIMEOUT_MS;
- config.retries = (commandLine.hasOption("r")) ?
- Integer.parseInt(commandLine.getOptionValue("r")) :
- DEFAULT_RETRIES;
- config.xmx = (commandLine.hasOption("x")) ?
- commandLine.getOptionValue("x") :
- DEFAULT_XMX;
- return config;
- }
-
-
int numThreads;
//number of variants tried per file
int perFileIterations;
//maxTransformers per file
int maxTransformers;
-
//max time allowed to process each file in milliseconds
long timeoutMS;
-
//times to retry a seed file after a catastrophic failure
int retries;
-
//xmx for forked process, e.g. 512m or 1G
String xmx;
Path inputDir;
Path outputDir;
+ public static FuzzingCLIConfig parse(String[] args) throws ParseException {
+ CommandLineParser parser = new DefaultParser();
+ CommandLine commandLine = parser.parse(OPTIONS, args);
+ FuzzingCLIConfig config = new FuzzingCLIConfig();
+ config.inputDir = Paths.get(commandLine.getOptionValue("i"));
+ config.outputDir = Paths.get(commandLine.getOptionValue("o"));
+ config.numThreads =
+ (commandLine.hasOption("n")) ? Integer.parseInt(commandLine.getOptionValue("n")) :
+ DEFAULT_NUM_THREADS;
+ config.perFileIterations =
+ (commandLine.hasOption("p")) ? Integer.parseInt(commandLine.getOptionValue("p")) :
+ DEFAULT_NUM_ITERATIONS;
+ config.maxTransformers =
+ (commandLine.hasOption("t")) ? Integer.parseInt(commandLine.getOptionValue("t")) :
+ DEFAULT_MAX_TRANSFORMERS;
+ config.timeoutMS =
+ (commandLine.hasOption("m")) ? Integer.parseInt(commandLine.getOptionValue("m")) :
+ DEFAULT_TIMEOUT_MS;
+ config.retries =
+ (commandLine.hasOption("r")) ? Integer.parseInt(commandLine.getOptionValue("r")) :
+ DEFAULT_RETRIES;
+ config.xmx = (commandLine.hasOption("x")) ? commandLine.getOptionValue("x") : DEFAULT_XMX;
+ return config;
+ }
public int getNumThreads() {
return numThreads;
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
index 9d3efa0..43ba46b 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteDeleter.java
@@ -16,9 +16,6 @@
*/
package org.apache.tika.fuzzing.general;
-import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.mime.MediaType;
-
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -26,12 +23,14 @@ import java.util.Collections;
import java.util.Random;
import java.util.Set;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
public class ByteDeleter implements Transformer {
+ static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
Random random = new Random();
float percentDeleted = 0.01f;
- static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
-
@Override
public Set<MediaType> getSupportedTypes() {
return SUPPORTED_TYPES;
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
index f4a906d..b830c7a 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteFlipper.java
@@ -16,10 +16,6 @@
*/
package org.apache.tika.fuzzing.general;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.mime.MediaType;
-
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -27,14 +23,18 @@ import java.util.Collections;
import java.util.Random;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
public class ByteFlipper implements Transformer {
+ static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
//TODO add something about protecting first x bytes?
- private Random random = new Random();
+ private final Random random = new Random();
private float percentCorrupt = 0.01f;
- static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
-
@Override
public Set<MediaType> getSupportedTypes() {
return SUPPORTED_TYPES;
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
index 77ed09c..b6a5cd0 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/ByteInjector.java
@@ -16,10 +16,6 @@
*/
package org.apache.tika.fuzzing.general;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.mime.MediaType;
-
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -28,11 +24,16 @@ import java.util.Collections;
import java.util.Random;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
public class ByteInjector implements Transformer {
+ static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
Random random = new Random();
float injectionFrequency = 0.01f;
int maxSpan = 100;
- static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
@Override
public Set<MediaType> getSupportedTypes() {
@@ -43,7 +44,7 @@ public class ByteInjector implements Transformer {
public void transform(InputStream is, OutputStream os) throws IOException {
//TODO -- don't load the full thing into memory
byte[] input = IOUtils.toByteArray(is);
- int numInjections = (int) Math.floor((double)injectionFrequency*(double)input.length);
+ int numInjections = (int) Math.floor((double) injectionFrequency * (double) input.length);
//at least one injection
numInjections = numInjections == 0 ? 1 : numInjections;
int[] starts = new int[numInjections];
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
index b8b0bbc..62f3fc9 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/GeneralTransformer.java
@@ -16,13 +16,6 @@
*/
package org.apache.tika.fuzzing.general;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.mime.MediaType;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -33,25 +26,32 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
-public class GeneralTransformer implements Transformer {
+import org.apache.commons.io.IOUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
- private static final Logger LOG = LoggerFactory.getLogger(GeneralTransformer.class);
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
- Random random = new Random();
+public class GeneralTransformer implements Transformer {
+ private static final Logger LOG = LoggerFactory.getLogger(GeneralTransformer.class);
private final int maxTransforms;
private final Transformer[] transformers;
private final Set<MediaType> supportedTypes;
+ Random random = new Random();
+
public GeneralTransformer() {
- this(new ByteDeleter(), new ByteFlipper(),
- new ByteInjector(), new Truncator(), new SpanSwapper());
+ this(new ByteDeleter(), new ByteFlipper(), new ByteInjector(), new Truncator(),
+ new SpanSwapper());
}
- public GeneralTransformer(Transformer ... transformers) {
+ public GeneralTransformer(Transformer... transformers) {
this(transformers.length, transformers);
}
- public GeneralTransformer(int maxTransforms, Transformer ... transformers) {
+ public GeneralTransformer(int maxTransforms, Transformer... transformers) {
this.maxTransforms = (maxTransforms < 0) ? transformers.length : maxTransforms;
this.transformers = transformers;
Set<MediaType> tmpTypes = new HashSet<>();
@@ -83,8 +83,7 @@ public class GeneralTransformer implements Transformer {
for (int transformerIndex : transformerIndices) {
byte[] bytes = bos.toByteArray();
bos = new ByteArrayOutputStream();
- transformers[transformerIndex].transform(
- new ByteArrayInputStream(bytes), bos);
+ transformers[transformerIndex].transform(new ByteArrayInputStream(bytes), bos);
bos.flush();
if (bos.toByteArray().length == 0) {
LOG.warn("zero length: " + transformers[transformerIndex]);
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
index 9dc7ac8..a15a750 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/SpanSwapper.java
@@ -16,10 +16,6 @@
*/
package org.apache.tika.fuzzing.general;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.mime.MediaType;
-
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -27,17 +23,20 @@ import java.util.Collections;
import java.util.Random;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
/**
* randomly swaps spans from the input
- *
*/
public class SpanSwapper implements Transformer {
+ static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
Random random = new Random();
- private float swapProbability = 0.01f;
int maxSpanLength = 10000;
-
- static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+ private final float swapProbability = 0.01f;
@Override
public Set<MediaType> getSupportedTypes() {
@@ -47,7 +46,7 @@ public class SpanSwapper implements Transformer {
@Override
public void transform(InputStream is, OutputStream os) throws IOException {
byte[] input = IOUtils.toByteArray(is);
- int numSwaps = (int) Math.floor(swapProbability*input.length);
+ int numSwaps = (int) Math.floor(swapProbability * input.length);
//at least one swap
numSwaps = numSwaps == 0 ? 1 : numSwaps;
byte[] ret = new byte[input.length];
@@ -68,8 +67,7 @@ public class SpanSwapper implements Transformer {
int len = random.nextInt(maxSpanLength);
int maxStart = Math.max(srcStart, targStart);
- len = (len+maxStart < ret.length) ? len :
- ret.length-maxStart;
+ len = (len + maxStart < ret.length) ? len : ret.length - maxStart;
byte[] landingBytes = new byte[len];
//copy the landing zone
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
index caa1dd0..bf55836 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/general/Truncator.java
@@ -16,10 +16,6 @@
*/
package org.apache.tika.fuzzing.general;
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.fuzzing.Transformer;
-import org.apache.tika.mime.MediaType;
-
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -27,10 +23,15 @@ import java.util.Collections;
import java.util.Random;
import java.util.Set;
+import org.apache.commons.io.IOUtils;
+
+import org.apache.tika.fuzzing.Transformer;
+import org.apache.tika.mime.MediaType;
+
public class Truncator implements Transformer {
- Random random = new Random();
static Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.OCTET_STREAM);
+ Random random = new Random();
@Override
public Set<MediaType> getSupportedTypes() {
@@ -47,7 +48,7 @@ public class Truncator implements Transformer {
int len = 1 + random.nextInt(input.length);
//at least one
if (len >= input.length) {
- len = input.length-2;
+ len = input.length - 2;
if (len < 0) {
len = 0;
}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
index 58292a0..4575690 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/EvilCOSWriter.java
@@ -16,6 +16,36 @@
*/
package org.apache.tika.fuzzing.pdf;
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.SequenceInputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSBoolean;
@@ -48,48 +78,17 @@ import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.COSFilterInputStream;
import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
import org.apache.pdfbox.util.Hex;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.tika.exception.TikaException;
import org.apache.tika.fuzzing.Transformer;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedOutputStream;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.Closeable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.SequenceInputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.text.DecimalFormat;
-import java.text.DecimalFormatSymbols;
-import java.text.NumberFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Deque;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Hashtable;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
public class EvilCOSWriter implements ICOSVisitor, Closeable {
- private static final Logger LOG = LoggerFactory.getLogger(EvilCOSWriter.class);
-
/**
* The dictionary open token.
*/
@@ -106,7 +105,6 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
* The start to a PDF comment.
*/
public static final byte[] COMMENT = {'%'};
-
/**
* The output version of the PDF.
*/
@@ -114,17 +112,17 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
/**
* Garbage bytes used to create the PDF header.
*/
- public static final byte[] GARBAGE = new byte[]{(byte) 0xf6, (byte) 0xe4, (byte) 0xfc, (byte) 0xdf};
+ public static final byte[] GARBAGE =
+ new byte[]{(byte) 0xf6, (byte) 0xe4, (byte) 0xfc, (byte) 0xdf};
/**
* The EOF constant.
*/
public static final byte[] EOF = "%%EOF".getBytes(StandardCharsets.US_ASCII);
- // pdf tokens
-
/**
* The reference token.
*/
public static final byte[] REFERENCE = "R".getBytes(StandardCharsets.US_ASCII);
+ // pdf tokens
/**
* The XREF token.
*/
@@ -169,47 +167,27 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
* The close stream token.
*/
public static final byte[] ENDSTREAM = "endstream".getBytes(StandardCharsets.US_ASCII);
-
- private final NumberFormat formatXrefOffset = new DecimalFormat("0000000000",
- DecimalFormatSymbols.getInstance(Locale.US));
+ private static final Logger LOG = LoggerFactory.getLogger(EvilCOSWriter.class);
+ private final NumberFormat formatXrefOffset =
+ new DecimalFormat("0000000000", DecimalFormatSymbols.getInstance(Locale.US));
// the decimal format for the xref object generation number data
- private final NumberFormat formatXrefGeneration = new DecimalFormat("00000",
- DecimalFormatSymbols.getInstance(Locale.US));
-
- // the stream where we create the pdf output
- private OutputStream output;
-
- // the stream used to write standard cos data
- private COSStandardOutputStream standardOutput;
-
- // the start position of the x ref section
- private long startxref = 0;
-
- // the current object number
- private long number = 0;
-
- private int roughNumberOfObjects = 0;
-
+ private final NumberFormat formatXrefGeneration =
+ new DecimalFormat("00000", DecimalFormatSymbols.getInstance(Locale.US));
// maps the object to the keys generated in the writer
// these are used for indirect references in other objects
//A hashtable is used on purpose over a hashmap
//so that null entries will not get added.
@SuppressWarnings({"squid:S1149"})
private final Map<COSBase, COSObjectKey> objectKeys = new Hashtable<>();
-
private final Map<COSObjectKey, COSBase> keyObject = new HashMap<>();
-
// the list of x ref entries to be made so far
private final List<COSWriterXRefEntry> xRefEntries = new ArrayList<>();
private final Set<COSBase> objectsToWriteSet = new HashSet<>();
-
//A list of objects to write.
private final Deque<COSBase> objectsToWrite = new LinkedList<>();
-
//a list of objects already written
private final Set<COSBase> writtenObjects = new HashSet<>();
-
//An 'actual' is any COSBase that is not a COSObject.
//need to keep a list of the actuals that are added
//as well as the objects because there is a problem
@@ -217,14 +195,23 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
//the actual for that object, so we will track
//actuals separately.
private final Set<COSBase> actualsAdded = new HashSet<>();
-
+ private final PDFTransformerConfig config;
+ private final Random random = new Random();
+ // the stream where we create the pdf output
+ private OutputStream output;
+ // the stream used to write standard cos data
+ private COSStandardOutputStream standardOutput;
+ // the start position of the x ref section
+ private long startxref = 0;
+ // the current object number
+ private long number = 0;
+ private int roughNumberOfObjects = 0;
private COSObjectKey currentObjectKey = null;
private PDDocument pdDocument = null;
private FDFDocument fdfDocument = null;
private boolean willEncrypt = false;
-
// signing
- private boolean incrementalUpdate = false;
+ private final boolean incrementalUpdate = false;
private boolean reachedSignature = false;
private long signatureOffset;
private long signatureLength;
@@ -235,11 +222,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
private SignatureInterface signatureInterface;
private byte[] incrementPart;
private COSArray byteRangeArray;
+ private final FilterFactory filterFactory = FilterFactory.INSTANCE;
- private FilterFactory filterFactory = FilterFactory.INSTANCE;
-
- private final PDFTransformerConfig config;
- private final Random random = new Random();
/**
* COSWriter constructor.
*
@@ -252,7 +236,79 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
this.config = config;
}
- private void prepareIncrement(PDDocument doc) throws IOException {
+ /**
+ * This will output the given byte getString as a PDF object.
+ *
+ * @param string COSString to be written
+ * @param output The stream to write to.
+ * @throws IOException If there is an error writing to the stream.
+ */
+ public static void writeString(COSString string, OutputStream output) throws IOException {
+ writeString(string.getBytes(), string.getForceHexForm(), output);
+ }
+
+ /**
+ * This will output the given text/byte getString as a PDF object.
+ *
+ * @param bytes byte array representation of a string to be written
+ * @param output The stream to write to.
+ * @throws IOException If there is an error writing to the stream.
+ */
+ public static void writeString(byte[] bytes, OutputStream output) throws IOException {
+ writeString(bytes, false, output);
+ }
+
+ /**
+ * This will output the given text/byte string as a PDF object.
+ *
+ * @param output The stream to write to.
+ * @throws IOException If there is an error writing to the stream.
+ */
+ private static void writeString(byte[] bytes, boolean forceHex, OutputStream output)
+ throws IOException {
+ // check for non-ASCII characters
+ boolean isASCII = true;
+ if (!forceHex) {
+ for (byte b : bytes) {
+ // if the byte is negative then it is an eight bit byte and is outside the ASCII range
+ if (b < 0) {
+ isASCII = false;
+ break;
+ }
+ // PDFBOX-3107 EOL markers within a string are troublesome
+ if (b == 0x0d || b == 0x0a) {
+ isASCII = false;
+ break;
+ }
+ }
+ }
+
+ if (isASCII && !forceHex) {
+ // write ASCII string
+ output.write('(');
+ for (byte b : bytes) {
+ switch (b) {
+ case '(':
+ case ')':
+ case '\\':
+ output.write('\\');
+ output.write(b);
+ break;
+ default:
+ output.write(b);
+ break;
+ }
+ }
+ output.write(')');
+ } else {
+ // write hex string
+ output.write('<');
+ Hex.writeHexBytes(bytes, output);
+ output.write('>');
+ }
+ }
+
+ private void prepareIncrement(PDDocument doc) throws IOException {
if (doc != null) {
COSDocument cosDoc = doc.getDocument();
@@ -311,6 +367,16 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
/**
+ * This will set the current object number.
+ *
+ * @param newNumber The new object number.
+ */
+ protected void setNumber(long newNumber) {
+ number = newNumber;
+
+ }
+
+ /**
* This will get all available object keys.
*
* @return A map of all object keys.
@@ -329,67 +395,57 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
/**
- * This will get the standard output stream.
- *
- * @return The standard output stream.
- */
- protected COSStandardOutputStream getStandardOutput() {
- return standardOutput;
- }
-
- /**
- * This will get the current start xref.
+ * This will set the output stream.
*
- * @return The current start xref.
+ * @param newOutput The new output stream.
*/
- protected long getStartxref() {
- return startxref;
+ private void setOutput(OutputStream newOutput) {
+ output = newOutput;
}
/**
- * This will get the xref entries.
+ * This will get the standard output stream.
*
- * @return All available xref entries.
+ * @return The standard output stream.
*/
- protected List<COSWriterXRefEntry> getXRefEntries() {
- return xRefEntries;
+ protected COSStandardOutputStream getStandardOutput() {
+ return standardOutput;
}
/**
- * This will set the current object number.
+ * This will set the standard output stream.
*
- * @param newNumber The new object number.
+ * @param newStandardOutput The new standard output stream.
*/
- protected void setNumber(long newNumber) {
- number = newNumber;
-
+ private void setStandardOutput(COSStandardOutputStream newStandardOutput) {
+ standardOutput = newStandardOutput;
}
/**
- * This will set the output stream.
+ * This will get the current start xref.
*
- * @param newOutput The new output stream.
+ * @return The current start xref.
*/
- private void setOutput(OutputStream newOutput) {
- output = newOutput;
+ protected long getStartxref() {
+ return startxref;
}
/**
- * This will set the standard output stream.
+ * This will set the start xref.
*
- * @param newStandardOutput The new standard output stream.
+ * @param newStartxref The new start xref attribute.
*/
- private void setStandardOutput(COSStandardOutputStream newStandardOutput) {
- standardOutput = newStandardOutput;
+ protected void setStartxref(long newStartxref) {
+ startxref = newStartxref;
}
/**
- * This will set the start xref.
+ * This will get the xref entries.
*
- * @param newStartxref The new start xref attribute.
+ * @return All available xref entries.
*/
- protected void setStartxref(long newStartxref) {
- startxref = newStartxref;
+ protected List<COSWriterXRefEntry> getXRefEntries() {
+ return xRefEntries;
}
/**
@@ -434,8 +490,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
actual = ((COSObject) actual).getObject();
}
- if (!writtenObjects.contains(object) &&
- !objectsToWriteSet.contains(object) &&
+ if (!writtenObjects.contains(object) && !objectsToWriteSet.contains(object) &&
!actualsAdded.contains(actual)) {
COSBase cosBase = null;
COSObjectKey cosObjectKey = null;
@@ -445,9 +500,11 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
if (cosObjectKey != null) {
cosBase = keyObject.get(cosObjectKey);
}
- if (actual != null && objectKeys.containsKey(actual)
- && object instanceof COSUpdateInfo && !((COSUpdateInfo) object).isNeedToBeUpdated()
- && cosBase instanceof COSUpdateInfo && !((COSUpdateInfo) cosBase).isNeedToBeUpdated()) {
+ if (actual != null && objectKeys.containsKey(actual) &&
+ object instanceof COSUpdateInfo &&
+ !((COSUpdateInfo) object).isNeedToBeUpdated() &&
+ cosBase instanceof COSUpdateInfo &&
+ !((COSUpdateInfo) cosBase).isNeedToBeUpdated()) {
return;
}
objectsToWrite.add(object);
@@ -473,13 +530,15 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
// write the object
long objectNumber = currentObjectKey.getNumber();
- if (config.getRandomizeObjectNumbers() > 0.0f && random.nextFloat() <
- config.getRandomizeObjectNumbers()) {
- objectNumber = random.nextInt(((int)objectNumber)*2);
+ if (config.getRandomizeObjectNumbers() > 0.0f &&
+ random.nextFloat() < config.getRandomizeObjectNumbers()) {
+ objectNumber = random.nextInt(((int) objectNumber) * 2);
}
- getStandardOutput().write(String.valueOf(objectNumber).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(
+ String.valueOf(objectNumber).getBytes(StandardCharsets.ISO_8859_1));
getStandardOutput().write(SPACE);
- getStandardOutput().write(String.valueOf(currentObjectKey.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(String.valueOf(currentObjectKey.getGeneration())
+ .getBytes(StandardCharsets.ISO_8859_1));
getStandardOutput().write(SPACE);
getStandardOutput().write(OBJ);
getStandardOutput().writeEOL();
@@ -496,15 +555,16 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
private void writeObjContents(COSBase obj) throws IOException {
- if (! (obj instanceof COSObject)) {
+ if (!(obj instanceof COSObject)) {
obj.accept(this);
return;
}
- COSObject cosObject = (COSObject)obj;
+ COSObject cosObject = (COSObject) obj;
COSBase underlyingObject = cosObject.getObject();
- if (underlyingObject instanceof COSStream && config.getUnfilteredStreamTransformer() != null) {
- COSStream cosStream = (COSStream)underlyingObject;
+ if (underlyingObject instanceof COSStream &&
+ config.getUnfilteredStreamTransformer() != null) {
+ COSStream cosStream = (COSStream) underlyingObject;
Transformer unfilteredStreamTransformer = config.getUnfilteredStreamTransformer();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try (InputStream is = cosStream.createRawInputStream()) {
@@ -512,7 +572,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
ByteArrayOutputStream transformed = new ByteArrayOutputStream();
try {
- unfilteredStreamTransformer.transform(new ByteArrayInputStream(bos.toByteArray()), transformed);
+ unfilteredStreamTransformer.transform(new ByteArrayInputStream(bos.toByteArray()),
+ transformed);
} catch (TikaException e) {
throw new IOException(e);
}
@@ -530,7 +591,7 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
//stub
if (obj instanceof COSStream) {
- COSStream stream = (COSStream)obj;
+ COSStream stream = (COSStream) obj;
//get the raw unfiltered bytes
byte[] bytes = new PDStream(stream).toByteArray();
//transform the underlying stream _before_ filters are applied
@@ -568,31 +629,32 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
stream.setItem(COSName.FILTER, actualFilters);
}
} else if (obj instanceof COSObject) {
- COSBase underlyingObject = ((COSObject)obj).getObject();
- mutate(underlyingObject);
+ COSBase underlyingObject = ((COSObject) obj).getObject();
+ mutate(underlyingObject);
}
}
- private TikaInputStream runFilters(COSBase filters, TikaInputStream is, List<COSName> usedFilters) throws IOException {
+ private TikaInputStream runFilters(COSBase filters, TikaInputStream is,
+ List<COSName> usedFilters) throws IOException {
if (filters instanceof COSNull) {
} else if (filters instanceof COSName) {
- is = runFilter((COSName)filters, is, new COSDictionary(), 0);
- usedFilters.add((COSName)filters);
- LOG.debug("filter:" + filters.toString() +" "+0 + " : " + is.getLength() );
+ is = runFilter((COSName) filters, is, new COSDictionary(), 0);
+ usedFilters.add((COSName) filters);
+ LOG.debug("filter:" + filters + " " + 0 + " : " + is.getLength());
} else if (filters instanceof COSArray) {
- COSArray filterArray = (COSArray)filters;
+ COSArray filterArray = (COSArray) filters;
//need to apply them in reverse order!
boolean transformed = false;
- for (int i = filterArray.size()-1; i >= 0; i--) {
- COSName filter = (COSName)filterArray.get(i);
+ for (int i = filterArray.size() - 1; i >= 0; i--) {
+ COSName filter = (COSName) filterArray.get(i);
is = runFilter(filter, is, new COSDictionary(), 0);
if (random.nextFloat() > 0.1 && transformed == false) {
is = transformRawStream(is);
transformed = true;
}
usedFilters.add(filter);
- LOG.debug("filter:" + filter.toString() +" "+i + " : " + is.getLength());
+ LOG.debug("filter:" + filter.toString() + " " + i + " : " + is.getLength());
if (is.getLength() > config.getMaxFilteredStreamLength()) {
LOG.debug("stopping early");
return is;
@@ -600,7 +662,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
return is;
} else {
- throw new IllegalArgumentException("Can't handle this class here: "+filters.getClass());
+ throw new IllegalArgumentException(
+ "Can't handle this class here: " + filters.getClass());
}
return transformRawStream(is);
}
@@ -631,8 +694,9 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
return is;
}
- private TikaInputStream runFilter(COSName filterCOSName, TikaInputStream tis, COSDictionary filterParameters,
- int filterIndex) throws IOException {
+ private TikaInputStream runFilter(COSName filterCOSName, TikaInputStream tis,
+ COSDictionary filterParameters, int filterIndex)
+ throws IOException {
Filter filter = filterFactory.getFilter(filterCOSName);
if (tis.getLength() < 100000000) {
@@ -680,9 +744,9 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
protected void doWriteHeader(COSDocument doc) throws IOException {
String headerString;
if (fdfDocument != null) {
- headerString = "%FDF-" + Float.toString(doc.getVersion());
+ headerString = "%FDF-" + doc.getVersion();
} else {
- headerString = "%PDF-" + Float.toString(doc.getVersion());
+ headerString = "%PDF-" + doc.getVersion();
}
getStandardOutput().write(headerString.getBytes(StandardCharsets.ISO_8859_1));
@@ -692,7 +756,6 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
getStandardOutput().writeEOL();
}
-
/**
* This will write the trailer to the PDF document.
*
@@ -818,7 +881,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
long inLength = incrementalInput.length();
long beforeLength = signatureOffset;
long afterOffset = signatureOffset + signatureLength;
- long afterLength = getStandardOutput().getPos() - (inLength + signatureLength) - (signatureOffset - inLength);
+ long afterLength = getStandardOutput().getPos() - (inLength + signatureLength) -
+ (signatureOffset - inLength);
String byteRange = "0 " + beforeLength + " " + afterOffset + " " + afterLength + "]";
@@ -880,14 +944,9 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
// range of incremental bytes to be signed (includes /ByteRange but not /Contents)
int incPartSigOffset = (int) (signatureOffset - incrementalInput.length());
int afterSigOffset = incPartSigOffset + (int) signatureLength;
- int[] range =
- {
- 0, incPartSigOffset,
- afterSigOffset, incrementPart.length - afterSigOffset
- };
-
- return new SequenceInputStream(
- new RandomAccessInputStream(incrementalInput),
+ int[] range = {0, incPartSigOffset, afterSigOffset, incrementPart.length - afterSigOffset};
+
+ return new SequenceInputStream(new RandomAccessInputStream(incrementalInput),
new COSFilterInputStream(incrementPart, range));
}
@@ -912,7 +971,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
// overwrite the signature Contents in the buffer
int incPartSigOffset = (int) (signatureOffset - incrementalInput.length());
- System.arraycopy(signatureBytes, 0, incrementPart, incPartSigOffset + 1, signatureBytes.length);
+ System.arraycopy(signatureBytes, 0, incrementPart, incPartSigOffset + 1,
+ signatureBytes.length);
// write the data to the incremental output stream
IOUtils.copy(new RandomAccessInputStream(incrementalInput), incrementalOutput);
@@ -1028,7 +1088,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
} else if (current instanceof COSObject) {
COSBase subValue = ((COSObject) current).getObject();
- if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) {
+ if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary ||
+ subValue == null) {
// PDFBOX-4308: added willEncrypt to prevent an object
// that is referenced several times from being written
// direct and indirect, thus getting encrypted
@@ -1104,7 +1165,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
} else if (value instanceof COSObject) {
COSBase subValue = ((COSObject) value).getObject();
- if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary || subValue == null) {
+ if (willEncrypt || incrementalUpdate || subValue instanceof COSDictionary ||
+ subValue == null) {
// PDFBOX-4308: added willEncrypt to prevent an object
// that is referenced several times from being written
// direct and indirect, thus getting encrypted
@@ -1177,7 +1239,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
// write endof
getStandardOutput().write(STARTXREF);
getStandardOutput().writeEOL();
- getStandardOutput().write(String.valueOf(getStartxref()).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(
+ String.valueOf(getStartxref()).getBytes(StandardCharsets.ISO_8859_1));
getStandardOutput().writeEOL();
getStandardOutput().write(EOF);
getStandardOutput().writeEOL();
@@ -1227,17 +1290,18 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
COSObjectKey key = getObjectKey(obj);
float randomThreshold = config.getRandomizeRefNumbers();
float r = random.nextFloat();
- if (randomThreshold > 0.0f &&
- r < randomThreshold) {
+ if (randomThreshold > 0.0f && r < randomThreshold) {
long num = random.nextInt(roughNumberOfObjects);
- LOG.debug("corrupting ref number: "+key.getNumber() + " -> "+num);
+ LOG.debug("corrupting ref number: " + key.getNumber() + " -> " + num);
getStandardOutput().write(String.valueOf(num).getBytes(StandardCharsets.ISO_8859_1));
} else {
- getStandardOutput().write(String.valueOf(key.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(
+ String.valueOf(key.getNumber()).getBytes(StandardCharsets.ISO_8859_1));
}
getStandardOutput().write(SPACE);
- getStandardOutput().write(String.valueOf(key.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
+ getStandardOutput().write(
+ String.valueOf(key.getGeneration()).getBytes(StandardCharsets.ISO_8859_1));
getStandardOutput().write(SPACE);
getStandardOutput().write(REFERENCE);
}
@@ -1246,7 +1310,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
public Object visitFromStream(COSStream obj) throws IOException {
if (willEncrypt) {
pdDocument.getEncryption().getSecurityHandler()
- .encryptStream(obj, currentObjectKey.getNumber(), currentObjectKey.getGeneration());
+ .encryptStream(obj, currentObjectKey.getNumber(),
+ currentObjectKey.getGeneration());
}
InputStream input = null;
@@ -1270,13 +1335,13 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
}
+
@Override
public Object visitFromString(COSString obj) throws IOException {
if (willEncrypt) {
- pdDocument.getEncryption().getSecurityHandler().encryptString(
- obj,
- currentObjectKey.getNumber(),
- currentObjectKey.getGeneration());
+ pdDocument.getEncryption().getSecurityHandler()
+ .encryptString(obj, currentObjectKey.getNumber(),
+ currentObjectKey.getGeneration());
}
COSWriter.writeString(obj, getStandardOutput());
return null;
@@ -1285,7 +1350,6 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
/**
* This will write the pdf document. }
*
- *
* @param doc The document to write.
* @throws IOException If an error occurs while generating the data.
*/
@@ -1317,8 +1381,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
* policy.
*/
public void write(PDDocument doc, SignatureInterface signInterface) throws IOException {
- long idTime = doc.getDocumentId() == null ? System.currentTimeMillis() :
- doc.getDocumentId();
+ long idTime =
+ doc.getDocumentId() == null ? System.currentTimeMillis() : doc.getDocumentId();
pdDocument = doc;
signatureInterface = signInterface;
@@ -1338,10 +1402,12 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
} else {
if (pdDocument.getEncryption() != null) {
if (!incrementalUpdate) {
- SecurityHandler securityHandler = pdDocument.getEncryption().getSecurityHandler();
+ SecurityHandler securityHandler =
+ pdDocument.getEncryption().getSecurityHandler();
if (!securityHandler.hasProtectionPolicy()) {
- throw new IllegalStateException("PDF contains an encryption dictionary, please remove it with "
- + "setAllSecurityToBeRemoved() or set a protection policy with protect()");
+ throw new IllegalStateException(
+ "PDF contains an encryption dictionary, please remove it with " +
+ "setAllSecurityToBeRemoved() or set a protection policy with protect()");
}
securityHandler.prepareDocumentForEncryption(pdDocument);
}
@@ -1384,7 +1450,8 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
}
}
// reuse origin documentID if available as first value
- COSString firstID = missingID ? new COSString(md5.digest()) : (COSString) idArray.get(0);
+ COSString firstID =
+ missingID ? new COSString(md5.digest()) : (COSString) idArray.get(0);
// it's ok to use the same ID for the second part if the ID is created for the first time
COSString secondID = missingID ? firstID : new COSString(md5.digest());
idArray = new COSArray();
@@ -1407,76 +1474,4 @@ public class EvilCOSWriter implements ICOSVisitor, Closeable {
COSDocument cosDoc = fdfDocument.getDocument();
cosDoc.accept(this);
}
-
- /**
- * This will output the given byte getString as a PDF object.
- *
- * @param string COSString to be written
- * @param output The stream to write to.
- * @throws IOException If there is an error writing to the stream.
- */
- public static void writeString(COSString string, OutputStream output) throws IOException {
- writeString(string.getBytes(), string.getForceHexForm(), output);
- }
-
- /**
- * This will output the given text/byte getString as a PDF object.
- *
- * @param bytes byte array representation of a string to be written
- * @param output The stream to write to.
- * @throws IOException If there is an error writing to the stream.
- */
- public static void writeString(byte[] bytes, OutputStream output) throws IOException {
- writeString(bytes, false, output);
- }
-
- /**
- * This will output the given text/byte string as a PDF object.
- *
- * @param output The stream to write to.
- * @throws IOException If there is an error writing to the stream.
- */
- private static void writeString(byte[] bytes, boolean forceHex, OutputStream output)
- throws IOException {
- // check for non-ASCII characters
- boolean isASCII = true;
- if (!forceHex) {
- for (byte b : bytes) {
- // if the byte is negative then it is an eight bit byte and is outside the ASCII range
- if (b < 0) {
- isASCII = false;
- break;
- }
- // PDFBOX-3107 EOL markers within a string are troublesome
- if (b == 0x0d || b == 0x0a) {
- isASCII = false;
- break;
- }
- }
- }
-
- if (isASCII && !forceHex) {
- // write ASCII string
- output.write('(');
- for (byte b : bytes) {
- switch (b) {
- case '(':
- case ')':
- case '\\':
- output.write('\\');
- output.write(b);
- break;
- default:
- output.write(b);
- break;
- }
- }
- output.write(')');
- } else {
- // write hex string
- output.write('<');
- Hex.writeHexBytes(bytes, output);
- output.write('>');
- }
- }
}
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
index a02d09d..4e10cd9 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformer.java
@@ -16,28 +16,30 @@
*/
package org.apache.tika.fuzzing.pdf;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Collections;
+import java.util.Set;
+
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
+
import org.apache.tika.exception.TikaException;
import org.apache.tika.fuzzing.Transformer;
import org.apache.tika.fuzzing.exceptions.CantFuzzException;
import org.apache.tika.mime.MediaType;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Collections;
-import java.util.Set;
-
public class PDFTransformer implements Transformer {
- private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.application("pdf"));
+ private final PDFTransformerConfig config = new PDFTransformerConfig();
+
@Override
public Set<MediaType> getSupportedTypes() {
return SUPPORTED_TYPES;
}
- private PDFTransformerConfig config = new PDFTransformerConfig();
-
@Override
public void transform(InputStream is, OutputStream os) throws IOException, TikaException {
try (PDDocument pdDocument = PDDocument.load(is)) {
diff --git a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
index 41f0daf..a494d4a 100644
--- a/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
+++ b/tika-fuzzing/src/main/java/org/apache/tika/fuzzing/pdf/PDFTransformerConfig.java
@@ -16,9 +16,16 @@
*/
package org.apache.tika.fuzzing.pdf;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
+
import org.apache.tika.fuzzing.Transformer;
import org.apache.tika.fuzzing.general.ByteDeleter;
import org.apache.tika.fuzzing.general.ByteFlipper;
@@ -27,12 +34,6 @@ import org.apache.tika.fuzzing.general.GeneralTransformer;
import org.apache.tika.fuzzing.general.SpanSwapper;
import org.apache.tika.fuzzing.general.Truncator;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
public class PDFTransformerConfig {
private final Random random = new Random();
@@ -48,20 +49,19 @@ public class PDFTransformerConfig {
private Set<COSName> allowableFilters = new HashSet<>();
- private Transformer streamTransformer = new GeneralTransformer(1,
- new ByteDeleter(),
- new ByteFlipper(), new ByteInjector(), new SpanSwapper(), new Truncator());
+ private Transformer streamTransformer =
+ new GeneralTransformer(1, new ByteDeleter(), new ByteFlipper(), new ByteInjector(),
+ new SpanSwapper(), new Truncator());
- private Transformer unfilteredStreamTransformer = new GeneralTransformer(1,
- new ByteDeleter(),
- new ByteFlipper(), new ByteInjector(), new SpanSwapper(), new Truncator());
+ private Transformer unfilteredStreamTransformer =
+ new GeneralTransformer(1, new ByteDeleter(), new ByteFlipper(), new ByteInjector(),
+ new SpanSwapper(), new Truncator());
public float getRandomizeObjectNumbers() {
return randomizeObjectNumbers;
}
/**
- *
* @param randomizeObjectNumbers probability that a given object number will be randomized.
* If < 0, this will be ignored.
*/
@@ -69,8 +69,11 @@ public class PDFTransformerConfig {
this.randomizeObjectNumbers = randomizeObjectNumbers;
}
+ public float getRandomizeRefNumbers() {
+ return randomizeRefNumbers;
+ }
+
/**
- *
* @param randomizeRefNumbers probability that a given reference number will be randomized.
* If < 0, this will be ignored.
*/
@@ -78,14 +81,20 @@ public class PDFTransformerConfig {
this.randomizeRefNumbers = randomizeRefNumbers;
}
- public float getRandomizeRefNumbers() {
- return randomizeRefNumbers;
- }
-
public Transformer getUnfilteredStreamTransformer() {
return unfilteredStreamTransformer;
}
+ /**
+ * This transformer is applied to the stream _before_ any filters
+ * are applied.
+ *
+ * @param transformer
+ */
+ public void setUnfilteredStreamTransformer(Transformer transformer) {
+ this.unfilteredStreamTransformer = transformer;
+ }
+
public Transformer getStreamTransformer() {
return streamTransformer;
}
@@ -100,16 +109,6 @@ public class PDFTransformerConfig {
}
/**
- * This transformer is applied to the stream _before_ any filters
- * are applied.
- * @param transformer
- */
- public void setUnfilteredStreamTransformer(Transformer transformer) {
- this.unfilteredStreamTransformer = transformer;
- }
-
- /**
- *
* @param maxFilters maximum number of filters to apply
*/
public void setMaxFilters(int maxFilters) {
@@ -118,6 +117,7 @@ public class PDFTransformerConfig {
/**
* Which filters are allowed
+ *
* @return
*/
public Set<COSName> getAllowableFilters() {
@@ -140,17 +140,17 @@ public class PDFTransformerConfig {
if (maxFilters < 0) {
List<COSName> ret = new ArrayList<>();
if (existingFilters instanceof COSArray) {
- for (COSBase obj : ((COSArray)existingFilters)) {
- ret.add((COSName)obj);
+ for (COSBase obj : ((COSArray) existingFilters)) {
+ ret.add((COSName) obj);
}
} else if (existingFilters instanceof COSName) {
- ret.add((COSName)existingFilters);
+ ret.add((COSName) existingFilters);
}
return ret;
}
int numFilters;
- if (maxFilters-minFilters == 0) {
+ if (maxFilters - minFilters == 0) {
numFilters = maxFilters;
} else {
numFilters = minFilters + random.nextInt(maxFilters - minFilters);
diff --git a/tika-fuzzing/src/test/java/TestFuzzingCLI.java b/tika-fuzzing/src/test/java/TestFuzzingCLI.java
index a98291b..b0a6f74 100644
--- a/tika-fuzzing/src/test/java/TestFuzzingCLI.java
+++ b/tika-fuzzing/src/test/java/TestFuzzingCLI.java
@@ -14,16 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-import org.apache.commons.io.FileUtils;
-import org.apache.tika.fuzzing.cli.FuzzingCLI;
-import org.apache.tika.utils.ProcessUtils;
-import org.junit.Ignore;
-import org.junit.Test;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
+import org.apache.commons.io.FileUtils;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import org.apache.tika.fuzzing.cli.FuzzingCLI;
+import org.apache.tika.utils.ProcessUtils;
+
public class TestFuzzingCLI {
@Test
@@ -32,14 +34,10 @@ public class TestFuzzingCLI {
//convert to actual unit test
String inputDir = "";// fill in
String outputDir = "";//fill in
- String[] args = new String[] {
- "-i", inputDir,
- "-o", outputDir,
- "-n", "8", // num threads
+ String[] args = new String[]{"-i", inputDir, "-o", outputDir, "-n", "8", // num threads
"-t", "1", //max transformers
"-p", "100", //per file iterations
- "-r", "3"
- };
+ "-r", "3"};
FuzzingCLI.main(args);
}
@@ -49,15 +47,14 @@ public class TestFuzzingCLI {
//convert to actual unit test
Path inputDir = Paths.get(getClass().getResource("/test-documents").toURI());
Path outputDir = Files.createTempDirectory("tika-fuzzing-");
- String[] args = new String[] {
- "-i", ProcessUtils.escapeCommandLine(inputDir.toAbsolutePath().toString()),
- "-o", ProcessUtils.escapeCommandLine(outputDir.toAbsolutePath().toString()),
- "-n", "8", // num threads
+ String[] args = new String[]{"-i",
+ ProcessUtils.escapeCommandLine(inputDir.toAbsolutePath().toString()), "-o",
+ ProcessUtils.escapeCommandLine(outputDir.toAbsolutePath().toString()), "-n", "8",
+ // num threads
"-t", "0", //max transformers
"-p", "10", //per file iterations
"-m", "10000", //max ms per file
- "-r", "3"
- };
+ "-r", "3"};
try {
FuzzingCLI.main(args);
} finally {
diff --git a/tika-fuzzing/src/test/java/TestTransformer.java b/tika-fuzzing/src/test/java/TestTransformer.java
index 1db2e1e..8584133 100644
--- a/tika-fuzzing/src/test/java/TestTransformer.java
+++ b/tika-fuzzing/src/test/java/TestTransformer.java
@@ -14,9 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-import org.apache.tika.fuzzing.general.GeneralTransformer;
-import org.junit.Ignore;
-import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -25,6 +22,11 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import org.apache.tika.fuzzing.general.GeneralTransformer;
+
public class TestTransformer {
@Test
@@ -38,8 +40,7 @@ public class TestTransformer {
for (int i = 0; i < 100; i++) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
- transformer.transform(
- new ByteArrayInputStream(bytes), bos);
+ transformer.transform(new ByteArrayInputStream(bytes), bos);
if (Arrays.equals(bos.toByteArray(), bytes)) {
System.out.println("SAME");
diff --git a/tika-fuzzing/src/test/resources/log4j.properties b/tika-fuzzing/src/test/resources/log4j.properties
deleted file mode 100644
index 92b6d56..0000000
--- a/tika-fuzzing/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#info,debug, error,fatal ...
-log4j.rootLogger=debug,stderr
-
-#console
-log4j.appender.stderr=org.apache.log4j.ConsoleAppender
-log4j.appender.stderr.layout=org.apache.log4j.PatternLayout
-log4j.appender.stderr.Target=System.err
-
-log4j.appender.stderr.layout.ConversionPattern= %-5p %m%n
diff --git a/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml b/tika-fuzzing/src/test/resources/log4j2.xml
similarity index 72%
copy from tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
copy to tika-fuzzing/src/test/resources/log4j2.xml
index 4561c3a..611f36d 100644
--- a/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
+++ b/tika-fuzzing/src/test/resources/log4j2.xml
@@ -1,4 +1,5 @@
-<?xml version="1.0" encoding="UTF-8" ?>
+<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
+
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -17,9 +18,15 @@
specific language governing permissions and limitations
under the License.
-->
-
-<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <throw class="java.lang.NullPointerException">another null pointer exception</throw>
-</mock>
\ No newline at end of file
+<Configuration status="WARN">
+ <Appenders>
+ <Console name="Console" target="SYSTEM_ERR">
+ <PatternLayout pattern="%-5p [%t] %d{HH:mm:ss,SSS} %c %m%n"/>
+ </Console>
+ </Appenders>
+ <Loggers>
+ <Root level="debug">
+ <AppenderRef ref="Console"/>
+ </Root>
+ </Loggers>
+</Configuration>
diff --git a/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml b/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
index f1f5b67..c9e028a 100644
--- a/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
+++ b/tika-fuzzing/src/test/resources/test-documents/heavy_hang.xml
@@ -19,7 +19,7 @@
-->
<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <hang millis="30000" heavy="true" pulse_millis="100" />
+ <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+ <write element="p">some content</write>
+ <hang millis="30000" heavy="true" pulse_millis="100"/>
</mock>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml b/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
index 4561c3a..e497da5 100644
--- a/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
+++ b/tika-fuzzing/src/test/resources/test-documents/null_pointer.xml
@@ -19,7 +19,7 @@
-->
<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <throw class="java.lang.NullPointerException">another null pointer exception</throw>
+ <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+ <write element="p">some content</write>
+ <throw class="java.lang.NullPointerException">another null pointer exception</throw>
</mock>
\ No newline at end of file
diff --git a/tika-fuzzing/src/test/resources/test-documents/system_exit.xml b/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
index 75d1d3b..52feede 100644
--- a/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
+++ b/tika-fuzzing/src/test/resources/test-documents/system_exit.xml
@@ -19,7 +19,7 @@
-->
<mock>
- <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
- <write element="p">some content</write>
- <system_exit />
+ <metadata action="add" name="author">Nikolai Lobachevsky</metadata>
+ <write element="p">some content</write>
+ <system_exit/>
</mock>
\ No newline at end of file