You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/03 18:09:20 UTC

[tika] branch main updated: Add an interface for rendering engines (#555)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new a02c195d1 Add an interface for rendering engines (#555)
a02c195d1 is described below

commit a02c195d162e165c9d3a9ff5938245b96a42bfd1
Author: Tim Allison <ta...@apache.org>
AuthorDate: Tue May 3 14:09:14 2022 -0400

    Add an interface for rendering engines (#555)
    
    * TIKA-3571 -- add an interface for rendering engines
---
 .../java/org/apache/tika/config/TikaConfig.java    | 139 ++++++++++++--
 .../tika/extractor/EmbeddedDocumentUtil.java       |   4 +-
 .../java/org/apache/tika/metadata/Rendering.java   |  27 +++
 .../apache/tika/metadata/TikaCoreProperties.java   |   5 +-
 .../java/org/apache/tika/parser/DefaultParser.java |  34 +++-
 .../org/apache/tika/parser/RenderingParser.java    |  24 +++
 .../apache/tika/renderer/CompositeRenderer.java    | 102 ++++++++++
 .../tika/renderer/PageBasedRenderResults.java      |  50 +++++
 .../org/apache/tika/renderer/PageRangeRequest.java |  60 ++++++
 .../org/apache/tika/renderer/RenderRequest.java    |  27 +++
 .../org/apache/tika/renderer/RenderResult.java     |  62 ++++++
 .../org/apache/tika/renderer/RenderResults.java    |  53 ++++++
 .../java/org/apache/tika/renderer/Renderer.java    |  62 ++++++
 .../org/apache/tika/renderer/RenderingState.java   |  26 +++
 .../org/apache/tika/renderer/RenderingTracker.java |  31 +++
 .../tika-parser-pdf-module/pom.xml                 |   7 +
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  | 212 +++++++++++++++------
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |  15 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |  18 +-
 .../tika/parser/pdf/PDFMarkedContent2XHTML.java    |  13 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java | 163 +++++++++++++---
 .../apache/tika/parser/pdf/PDFParserConfig.java    | 212 +++++++++++----------
 .../tika/parser/pdf/TextOnlyPDFRenderer.java       | 106 +++++++++++
 .../apache/tika/renderer/pdf/MuPDFRenderer.java    | 149 +++++++++++++++
 .../tika/renderer/pdf/PDDocumentRenderer.java      |  27 +++
 .../apache/tika/renderer/pdf/PDFBoxRenderer.java   | 198 +++++++++++++++++++
 .../tika/renderer/pdf/PDFRenderingState.java       |  45 +++++
 .../apache/tika/parser/pdf/PDFRenderingTest.java   | 109 +++++++++++
 .../tika/parser/pdf/tika-rendering-config.xml      |  25 +++
 .../apache/tika/parser/crypto/TSDParserTest.java   |   8 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  27 +++
 .../configs/tika-rendering-mupdf-config.xml        |  25 +++
 32 files changed, 1837 insertions(+), 228 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 1606262f6..e7c212f87 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -74,7 +74,10 @@ import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.RenderingParser;
 import org.apache.tika.parser.multiple.AbstractMultipleParser;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.AnnotationUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 
@@ -95,6 +98,7 @@ public class TikaConfig {
     private final MimeTypes mimeTypes;
     private final ExecutorService executorService;
     private final EncodingDetector encodingDetector;
+    private final Renderer renderer;
     private final MetadataFilter metadataFilter;
     private final AutoDetectParserConfig autoDetectParserConfig;
 
@@ -155,12 +159,14 @@ public class TikaConfig {
         TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
         EncodingDetectorXmlLoader encodingDetectorXmlLoader = new EncodingDetectorXmlLoader();
+        RendererXmlLoader rendererXmlLoader = new RendererXmlLoader();
         updateXMLReaderUtils(element);
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
         this.encodingDetector = encodingDetectorXmlLoader.loadOverall(element, mimeTypes, loader);
+        this.renderer = rendererXmlLoader.loadOverall(element, mimeTypes, loader);
 
-        ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector);
+        ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector, renderer);
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
         this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
@@ -187,7 +193,8 @@ public class TikaConfig {
         this.mimeTypes = getDefaultMimeTypes(loader);
         this.detector = getDefaultDetector(mimeTypes, serviceLoader);
         this.encodingDetector = getDefaultEncodingDetector(serviceLoader);
-        this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
+        this.renderer = getDefaultRenderer(serviceLoader);
+        this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector, renderer);
         this.translator = getDefaultTranslator(serviceLoader);
         this.executorService = getDefaultExecutorService();
         this.metadataFilter = new NoOpFilter();
@@ -223,7 +230,8 @@ public class TikaConfig {
             this.serviceLoader = new ServiceLoader();
             this.mimeTypes = getDefaultMimeTypes(getContextClassLoader());
             this.encodingDetector = getDefaultEncodingDetector(serviceLoader);
-            this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
+            this.renderer = getDefaultRenderer(serviceLoader);
+            this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector, renderer);
             this.detector = getDefaultDetector(mimeTypes, serviceLoader);
             this.translator = getDefaultTranslator(serviceLoader);
             this.executorService = getDefaultExecutorService();
@@ -237,6 +245,7 @@ public class TikaConfig {
                 serviceLoader = serviceLoaderFromDomElement(element, tmpServiceLoader.getLoader());
                 DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
+                RendererXmlLoader rendererLoader = new RendererXmlLoader();
                 TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
                 ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
 
@@ -244,8 +253,9 @@ public class TikaConfig {
                 this.encodingDetector =
                         encodingDetectorLoader.loadOverall(element, mimeTypes, serviceLoader);
 
+                this.renderer = rendererLoader.loadOverall(element, mimeTypes, serviceLoader);
 
-                ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector);
+                ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector, renderer);
                 this.parser = parserLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.detector = detectorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
@@ -273,9 +283,12 @@ public class TikaConfig {
         return new DefaultEncodingDetector(loader);
     }
 
+    protected static CompositeRenderer getDefaultRenderer(ServiceLoader loader) {
+        return new CompositeRenderer(loader);
+    }
     private static CompositeParser getDefaultParser(MimeTypes types, ServiceLoader loader,
-                                                    EncodingDetector encodingDetector) {
-        return new DefaultParser(types.getMediaTypeRegistry(), loader, encodingDetector);
+                                                    EncodingDetector encodingDetector, Renderer renderer) {
+        return new DefaultParser(types.getMediaTypeRegistry(), loader, encodingDetector, renderer);
     }
 
     private static Translator getDefaultTranslator(ServiceLoader loader) {
@@ -811,9 +824,11 @@ public class TikaConfig {
     private static class ParserXmlLoader extends XmlLoader<CompositeParser, Parser> {
 
         private final EncodingDetector encodingDetector;
+        private final Renderer renderer;
 
-        private ParserXmlLoader(EncodingDetector encodingDetector) {
+        private ParserXmlLoader(EncodingDetector encodingDetector, Renderer renderer) {
             this.encodingDetector = encodingDetector;
+            this.renderer = renderer;
         }
 
         boolean supportsComposite() {
@@ -860,7 +875,7 @@ public class TikaConfig {
 
         @Override
         CompositeParser createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
-            return getDefaultParser(mimeTypes, loader, encodingDetector);
+            return getDefaultParser(mimeTypes, loader, encodingDetector, renderer);
         }
 
         @Override
@@ -880,6 +895,15 @@ public class TikaConfig {
             MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
 
             // Try the possible default and composite parser constructors
+            if (parser == null) {
+                try {
+                    c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class,
+                            Collection.class, EncodingDetector.class, Renderer.class);
+                    parser = c.newInstance(registry, loader, excludeParsers, encodingDetector, renderer);
+                } catch (NoSuchMethodException me) {
+                    //swallow
+                }
+            }
             if (parser == null) {
                 try {
                     c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class,
@@ -948,12 +972,18 @@ public class TikaConfig {
         Parser newInstance(Class<? extends Parser> loadedClass)
                 throws IllegalAccessException, InstantiationException, NoSuchMethodException,
                 InvocationTargetException {
+            Parser parser = null;
             if (AbstractEncodingDetectorParser.class.isAssignableFrom(loadedClass)) {
                 Constructor ctor = loadedClass.getConstructor(EncodingDetector.class);
-                return (Parser) ctor.newInstance(encodingDetector);
+                parser = (Parser) ctor.newInstance(encodingDetector);
             } else {
-                return loadedClass.newInstance();
+                parser = loadedClass.newInstance();
             }
+
+            if (parser instanceof RenderingParser) {
+                ((RenderingParser)parser).setRenderer(renderer);
+            }
+            return parser;
         }
 
         @Override
@@ -1306,7 +1336,7 @@ public class TikaConfig {
                     c = encodingDetectorClass.getConstructor(List.class);
                     encodingDetector = c.newInstance(childEncodingDetectors);
                 } catch (NoSuchMethodException me) {
-                    LOG.debug("couldn't find constructor for EncodingDetecto(List) for {}",
+                    LOG.debug("couldn't find constructor for EncodingDetector(List) for {}",
                             encodingDetectorClass);
                 }
             }
@@ -1320,4 +1350,91 @@ public class TikaConfig {
         }
     }
 
+    private static class RendererXmlLoader
+            extends XmlLoader<Renderer, Renderer> {
+
+        boolean supportsComposite() {
+            return true;
+        }
+
+        String getParentTagName() {
+            return "renderers";
+        }
+
+        String getLoaderTagName() {
+            return "renderer";
+        }
+
+        @Override
+        Class<? extends Renderer> getLoaderClass() {
+            return Renderer.class;
+        }
+
+
+        @Override
+        boolean isComposite(Renderer loaded) {
+            return loaded instanceof CompositeRenderer;
+        }
+
+        @Override
+        boolean isComposite(Class<? extends Renderer> loadedClass) {
+            return CompositeRenderer.class.isAssignableFrom(loadedClass);
+        }
+
+        @Override
+        Renderer preLoadOne(Class<? extends Renderer> loadedClass, String classname,
+                                    MimeTypes mimeTypes) throws TikaException {
+            // Check for classes which can't be set in config
+            // Continue with normal loading
+            return null;
+        }
+
+        @Override
+        Renderer createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultRenderer(loader);
+        }
+
+        @Override
+        Renderer createComposite(List<Renderer> renderers,
+                                                  MimeTypes mimeTypes, ServiceLoader loader) {
+            return new CompositeRenderer(renderers);
+        }
+
+        @Override
+        Renderer createComposite(Class<? extends Renderer> rendererClass,
+                                         List<Renderer> childRenderers,
+                                         Set<Class<? extends Renderer>> excludeRenderers,
+                                         Map<String, Param> params, MimeTypes mimeTypes,
+                                         ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException, InstantiationException {
+            Renderer renderer = null;
+            Constructor<? extends Renderer> c;
+
+            // Try the possible default and composite detector constructors
+            if (renderer == null) {
+                try {
+                    c = rendererClass.getConstructor(ServiceLoader.class, Collection.class);
+                    renderer = c.newInstance(loader, excludeRenderers);
+                } catch (NoSuchMethodException me) {
+                    LOG.debug("couldn't find constructor for service loader + collection for {}",
+                            renderer);
+                }
+            }
+            if (renderer == null) {
+                try {
+                    c = rendererClass.getConstructor(List.class);
+                    renderer = c.newInstance(childRenderers);
+                } catch (NoSuchMethodException me) {
+                    LOG.debug("couldn't find constructor for Renderer(List) for {}",
+                            rendererClass);
+                }
+            }
+            return renderer;
+        }
+
+        @Override
+        Renderer decorate(Renderer created, Element element) {
+            return created; // No decoration of EncodingDetectors
+        }
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java
index 5854aba28..ccac4f1db 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java
@@ -92,7 +92,9 @@ public class EmbeddedDocumentUtil implements Serializable {
                 context.set(Parser.class, new AutoDetectParser(tikaConfig));
             }
         }
-        return new ParsingEmbeddedDocumentExtractor(context);
+        EmbeddedDocumentExtractor ex = new ParsingEmbeddedDocumentExtractor(context);
+        context.set(EmbeddedDocumentExtractor.class, ex);
+        return ex;
     }
 
     /**
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java b/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java
new file mode 100644
index 000000000..73788fef3
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.tika.metadata;
+
+public interface Rendering {
+    String RENDERING_PREFIX = "rendering:";
+
+    Property PAGE_NUMBER = Property.externalInteger(RENDERING_PREFIX + "page_number");
+    Property RENDERED_BY = Property.externalTextBag(RENDERING_PREFIX + "Rendered-By");
+    Property RENDERED_MS = Property.externalReal(RENDERING_PREFIX + "rendering-time-ms");
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index ba138c54f..21581a482 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -277,7 +277,7 @@ public interface TikaCoreProperties {
     Property EMBEDDED_RESOURCE_TYPE = Property.internalClosedChoise(EMBEDDED_RESOURCE_TYPE_KEY,
             EmbeddedResourceType.ATTACHMENT.toString(), EmbeddedResourceType.INLINE.toString(),
             EmbeddedResourceType.METADATA.toString(), EmbeddedResourceType.MACRO.toString(),
-            EmbeddedResourceType.THUMBNAIL.toString());
+            EmbeddedResourceType.THUMBNAIL.toString(), EmbeddedResourceType.RENDERING.toString());
     Property HAS_SIGNATURE = Property.internalBoolean("hasSignature");
 
 
@@ -302,6 +302,7 @@ public interface TikaCoreProperties {
         MACRO, //any code that is intended to be run by the application
         METADATA, //e.g. xmp, xfa
         FONT,//embedded font files
-        THUMBNAIL//TODO: set this in parsers that handle thumbnails
+        THUMBNAIL, //TODO: set this in parsers that handle thumbnails
+        RENDERING //if a file has been rendered
     }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
index 2abeeed52..336adee93 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
@@ -27,6 +27,8 @@ import org.apache.tika.detect.DefaultEncodingDetector;
 import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.ServiceLoaderUtils;
 
 /**
@@ -46,25 +48,27 @@ public class DefaultParser extends CompositeParser {
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers,
-                         EncodingDetector encodingDetector) {
-        super(registry, getDefaultParsers(loader, encodingDetector, excludeParsers));
+                         EncodingDetector encodingDetector, Renderer renderer) {
+        super(registry, getDefaultParsers(loader, encodingDetector, renderer, excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers) {
         super(registry,
-                getDefaultParsers(loader, new DefaultEncodingDetector(loader), excludeParsers));
+                getDefaultParsers(loader, new DefaultEncodingDetector(loader),
+                        new CompositeRenderer(loader), excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
-                         EncodingDetector encodingDetector) {
-        this(registry, loader, Collections.EMPTY_SET, encodingDetector);
+                         EncodingDetector encodingDetector, Renderer renderer) {
+        this(registry, loader, Collections.EMPTY_SET, encodingDetector, renderer);
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) {
-        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader));
+        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader),
+                new CompositeRenderer(loader));
     }
 
     public DefaultParser(MediaTypeRegistry registry, ClassLoader loader) {
@@ -94,6 +98,7 @@ public class DefaultParser extends CompositeParser {
      */
     private static List<Parser> getDefaultParsers(ServiceLoader loader,
                                                   EncodingDetector encodingDetector,
+                                                  Renderer renderer,
                                                   Collection<Class<? extends Parser>>
                                                           excludeParsers) {
         List<Parser> parsers =
@@ -104,6 +109,11 @@ public class DefaultParser extends CompositeParser {
                 setEncodingDetector(p, encodingDetector);
             }
         }
+        if (renderer != null) {
+            for (Parser p : parsers) {
+                setRenderer(p, renderer);
+            }
+        }
         ServiceLoaderUtils.sortLoadedClasses(parsers);
         return parsers;
     }
@@ -122,6 +132,18 @@ public class DefaultParser extends CompositeParser {
         }
     }
 
+    private static void setRenderer(Parser p, Renderer renderer) {
+        if (p instanceof RenderingParser) {
+            ((RenderingParser) p).setRenderer(renderer);
+        } else if (p instanceof CompositeParser) {
+            for (Parser child : ((CompositeParser) p).getAllComponentParsers()) {
+                setRenderer(child, renderer);
+            }
+        } else if (p instanceof ParserDecorator) {
+            setRenderer(((ParserDecorator) p).getWrappedParser(), renderer);
+        }
+    }
+
     @Override
     public Map<MediaType, Parser> getParsers(ParseContext context) {
         Map<MediaType, Parser> map = super.getParsers(context);
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java b/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java
new file mode 100644
index 000000000..0daae6be1
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.tika.parser;
+
+import org.apache.tika.renderer.Renderer;
+
+public interface RenderingParser {
+
+    void setRenderer(Renderer renderer);
+
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java b/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java
new file mode 100644
index 000000000..a98d39c97
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.tika.renderer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+public class CompositeRenderer implements Renderer, Initializable {
+
+    private Map<MediaType, Renderer> rendererMap = new HashMap<>();
+
+    public CompositeRenderer(ServiceLoader serviceLoader) {
+        this(getDefaultRenderers(serviceLoader));
+    }
+
+    public CompositeRenderer(List<Renderer> renderers) {
+        Map<MediaType, Renderer> tmp = new ConcurrentHashMap<>();
+        ParseContext empty = new ParseContext();
+        for (Renderer renderer : renderers) {
+            for (MediaType mt : renderer.getSupportedTypes(empty)) {
+                tmp.put(mt, renderer);
+            }
+        }
+        rendererMap = Collections.unmodifiableMap(tmp);
+    }
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return rendererMap.keySet();
+    }
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
+                                RenderRequest... requests) throws IOException, TikaException {
+
+        String mediaTypeString = metadata.get(TikaCoreProperties.TYPE);
+        if (mediaTypeString == null) {
+            throw new TikaException("need to specify file type in metadata");
+        }
+        MediaType mt = MediaType.parse(mediaTypeString);
+        if (mt == null) {
+            throw new TikaException("can't parse mediaType: " + mediaTypeString);
+        }
+        Renderer renderer = rendererMap.get(mt);
+        if (renderer == null) {
+            throw new TikaException("I regret I can't find a renderer for " + mt);
+        }
+        return renderer.render(is, metadata, parseContext, requests);
+    }
+
+    public Renderer getLeafRenderer(MediaType mt) {
+        return rendererMap.get(mt);
+    }
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+
+    }
+
+    private static List<Renderer> getDefaultRenderers(ServiceLoader loader) {
+        List<Renderer> staticRenderers =
+                loader.loadStaticServiceProviders(Renderer.class);
+
+        ServiceLoaderUtils.sortLoadedClasses(staticRenderers);
+        return staticRenderers;
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/PageBasedRenderResults.java b/tika-core/src/main/java/org/apache/tika/renderer/PageBasedRenderResults.java
new file mode 100644
index 000000000..0c238b60d
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/PageBasedRenderResults.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.metadata.Rendering;
+
+public class PageBasedRenderResults extends RenderResults {
+
+    Map<Integer, List<RenderResult>> results = new HashMap<>();
+
+    public PageBasedRenderResults(TemporaryResources tmp) {
+        super(tmp);
+    }
+    public void add(RenderResult result) {
+        Integer page = result.getMetadata().getInt(Rendering.PAGE_NUMBER);
+        if (page != null) {
+            List<RenderResult> pageResults = results.get(page);
+            if (pageResults == null) {
+                pageResults = new ArrayList<>();
+                results.put(page, pageResults);
+            }
+            pageResults.add(result);
+        }
+        super.add(result);
+    }
+
+    public List<RenderResult> getPage(int pageNumber) {
+        return results.get(pageNumber);
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/PageRangeRequest.java b/tika-core/src/main/java/org/apache/tika/renderer/PageRangeRequest.java
new file mode 100644
index 000000000..2534d7032
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/PageRangeRequest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.util.Objects;
+
+/**
+ * The range of pages to render.  These are 1-based, and "to" is inclusive.
+ */
+public class PageRangeRequest implements RenderRequest {
+
+    public static PageRangeRequest RENDER_ALL = new PageRangeRequest(1, -1);
+
+    private final int from;
+    private final int to;
+
+    public PageRangeRequest(int from, int to) {
+        this.from = from;
+        this.to = to;
+    }
+
+    public int getFrom() {
+        return from;
+    }
+
+    public int getTo() {
+        return to;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+        PageRangeRequest that = (PageRangeRequest) o;
+        return from == that.from && to == that.to;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(from, to);
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderRequest.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderRequest.java
new file mode 100644
index 000000000..3277d866a
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderRequest.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+/**
+ * Empty interface for requests to a renderer. Different
+ * file formats and different use cases will have different types of requests.
+ * For page based, it could be a page range (render the full pages from 2 to 5);
+ * or it could be a single page with an x-y bounding box.  For video files,
+ * it could be a temporal offset or a temporal offset with an x-y bounding box.
+ */
+public interface RenderRequest {
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java
new file mode 100644
index 000000000..888b0dd4c
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.nio.file.Path;
+
+import org.apache.tika.metadata.Metadata;
+
+public class RenderResult {
+
+    public enum STATUS {
+        SUCCESS,
+        EXCEPTION,
+        TIMEOUT
+    }
+    private final STATUS status;
+
+    private final int id;
+    private final Path path;
+    //TODO: we're relying on metadata to bring in a bunch of info.
+    //Might be cleaner to add specific parameters for page number, embedded path, etc.?
+    private final Metadata metadata;
+
+    public RenderResult(STATUS status, int id, Path path, Metadata metadata) {
+        this.status = status;
+        this.id = id;
+        this.path = path;
+        this.metadata = metadata;
+    }
+
+    public Path getPath() {
+        return path;
+    }
+
+    public Metadata getMetadata() {
+        return metadata;
+    }
+
+    public STATUS getStatus() {
+        return status;
+    }
+
+    public int getId() {
+        return id;
+    }
+
+
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java
new file mode 100644
index 000000000..12d60d3da
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.io.TemporaryResources;
+
+public class RenderResults implements Closeable {
+
+    private List<RenderResult> results = new ArrayList<>();
+
+    private final TemporaryResources tmp;
+    public RenderResults(TemporaryResources tmp) {
+        this.tmp = tmp;
+    }
+    public void add(RenderResult result) {
+        tmp.addResource(new Closeable() {
+            @Override
+            public void close() throws IOException {
+                Files.delete(result.getPath());
+            }
+        });
+        results.add(result);
+    }
+
+    public List<RenderResult> getResults() {
+        return results;
+    }
+
+    @Override
+    public void close() throws IOException {
+        tmp.close();
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java b/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java
new file mode 100644
index 000000000..bc4261f52
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+
+/**
+ * Interface for a renderer.  This should be flexible enough to run on the initial design: PDF pages
+ * but also on portions of PDF pages as well as on other document types.
+ *
+ */
+public interface Renderer extends Serializable {
+
+
+
+    /**
+     * Returns the set of media types supported by this renderer when used
+     * with the given parse context.
+     *
+     * @param context parse context
+     * @return immutable set of media types
+     * @since Apache Tika 2.5.0
+     */
+    Set<MediaType> getSupportedTypes(ParseContext context);
+
+    RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
+                         RenderRequest ... requests) throws IOException,
+            TikaException;
+
+    /*
+    At some point, we might need/want to add something like this, where for a given
+    page the requestor or the parser determines that they only want to render e.g. a
+    box within a page.
+
+    RenderResults render(InputStream is, int page, Coordinates coordinates, Metadata metadata,
+                         ParseContext parseContext) throws IOException,
+            TikaException;
+
+     */
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderingState.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderingState.java
new file mode 100644
index 000000000..ed8250065
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderingState.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+/**
+ * This should be to track state for each file (embedded or otherwise).
+ * This should be reset in the parseContext at the beginning of a parse
+ * and then replaced at the end of the parse.
+ */
+public class RenderingState {
+
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderingTracker.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderingTracker.java
new file mode 100644
index 000000000..49c775e69
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderingTracker.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+/**
+ * Use this in the ParseContext to keep track of unique ids for rendered
+ * images in embedded docs. This should be used for the full parse of
+ * a main document and its embedded document.
+ */
+public class RenderingTracker {
+
+    private int id = 0;
+
+    public synchronized int getNextId() {
+        return ++id;
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/pom.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/pom.xml
index 5440476bd..be05f67b1 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/pom.xml
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/pom.xml
@@ -74,6 +74,13 @@
       <artifactId>jaxb-runtime</artifactId>
       <version>${jaxb.version}</version>
     </dependency>
+    <!-- incompatible with Apache license, only use in testing -->
+    <dependency>
+      <groupId>com.github.jai-imageio</groupId>
+      <artifactId>jai-imageio-core</artifactId>
+      <version>${imageio.version}</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 93dfbd119..344756dd0 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -92,7 +92,6 @@ import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.tools.imageio.ImageIOUtil;
 import org.apache.pdfbox.util.Matrix;
 import org.apache.pdfbox.util.Vector;
-import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
 
@@ -105,10 +104,19 @@ import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Font;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.PDF;
+import org.apache.tika.metadata.Rendering;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.PageBasedRenderResults;
+import org.apache.tika.renderer.PageRangeRequest;
+import org.apache.tika.renderer.RenderResult;
+import org.apache.tika.renderer.Renderer;
+import org.apache.tika.renderer.RenderingTracker;
+import org.apache.tika.renderer.pdf.PDDocumentRenderer;
+import org.apache.tika.renderer.pdf.PDFRenderingState;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -156,10 +164,10 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     int unmappedUnicodeCharsPerPage = 0;
     int totalCharsPerPage = 0;
 
-    AbstractPDF2XHTML(PDDocument pdDocument, ContentHandler handler, ParseContext context,
+    AbstractPDF2XHTML(PDDocument pdDocument, XHTMLContentHandler xhtml, ParseContext context,
                       Metadata metadata, PDFParserConfig config) throws IOException {
         this.pdDocument = pdDocument;
-        this.xhtml = new XHTMLContentHandler(handler, metadata);
+        this.xhtml = xhtml;
         this.context = context;
         this.metadata = metadata;
         this.config = config;
@@ -293,9 +301,8 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     private void parseMetadata(InputStream stream, Metadata embeddedMetadata)
             throws IOException, SAXException {
         try {
-            embeddedDocumentExtractor
-                    .parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata,
-                            true);
+            embeddedDocumentExtractor.parseEmbedded(stream, new EmbeddedContentHandler(xhtml),
+                    embeddedMetadata, true);
         } catch (IOException e) {
             handleCatchableIOE(e);
         }
@@ -318,8 +325,9 @@ class AbstractPDF2XHTML extends PDFTextStripper {
 
     }
 
-    private void extractFilesfromEFTree(PDNameTreeNode efTree, Map<String,
-            PDComplexFileSpecification> embeddedFileNames, int depth) throws IOException {
+    private void extractFilesfromEFTree(PDNameTreeNode efTree,
+                                        Map<String, PDComplexFileSpecification> embeddedFileNames,
+                                        int depth) throws IOException {
         if (depth > MAX_RECURSION_DEPTH) {
             throw new IOException("Hit max recursion depth");
         }
@@ -434,9 +442,8 @@ class AbstractPDF2XHTML extends PDFTextStripper {
         xhtml.endElement("div");
 
         try {
-            embeddedDocumentExtractor
-                    .parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata,
-                            false);
+            embeddedDocumentExtractor.parseEmbedded(stream, new EmbeddedContentHandler(xhtml),
+                    embeddedMetadata, false);
         } finally {
             IOUtils.closeQuietly(stream);
         }
@@ -477,43 +484,19 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                                 "Please set the OCR_STRATEGY to NO_OCR or configure your" +
                                 "OCR parser correctly");
             } else if (ocrStrategy == AUTO) {
-                //silently skip
+                //silently skip if there's no parser to run ocr
                 return;
             }
         }
 
-        PDFRenderer renderer =
-                config.getOcrRenderingStrategy() == PDFParserConfig.OCR_RENDERING_STRATEGY.NO_TEXT ?
-                        new NoTextPDFRenderer(pdDocument) : new PDFRenderer(pdDocument);
-
         try (TemporaryResources tmp = new TemporaryResources()) {
-            int dpi = config.getOcrDPI();
-            Path tmpFile = null;
-            try {
-                BufferedImage image =
-                        renderer.renderImageWithDPI(pageIndex, dpi, config.getOcrImageType());
-                tmpFile = tmp.createTempFile();
-                try (OutputStream os = Files.newOutputStream(tmpFile)) {
-                    //TODO: get output format from TesseractConfig
-                    ImageIOUtil.writeImage(image, config.getOcrImageFormatName(), os, dpi,
-                            config.getOcrImageQuality());
-                }
-            } catch (SecurityException e) {
-                //throw SecurityExceptions immediately
-                throw e;
-            } catch (IOException | RuntimeException e) {
-                //image rendering can throw a variety of runtime exceptions, not just
-                // IOExceptions...
-                //need to have a wide catch
-                metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM,
-                        ExceptionUtils.getStackTrace(e));
-                return;
-            }
-            try (InputStream is = TikaInputStream.get(tmpFile)) {
-                metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
+            RenderResult renderResult = renderCurrentPage(context, tmp);
+            Metadata renderMetadata = renderResult.getMetadata();
+            try (InputStream is = TikaInputStream.get(renderResult.getPath())) {
+                renderMetadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
                         ocrImageMediaType.toString());
                 ocrParser.parse(is, new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
-                        metadata, context);
+                        renderMetadata, context);
             }
         } catch (IOException e) {
             handleCatchableIOE(e);
@@ -522,6 +505,121 @@ class AbstractPDF2XHTML extends PDFTextStripper {
         }
     }
 
+    private RenderResult renderCurrentPage(ParseContext parseContext,
+                                           TemporaryResources tmpResources)
+            throws IOException, TikaException {
+        PDFRenderingState renderingState = parseContext.get(PDFRenderingState.class);
+        if (renderingState == null) {
+            noContextRenderCurrentPage(parseContext, tmpResources);
+        }
+        //if the full document has already been rendered, then reuse that file
+        //TODO: we need to prevent this if only a portion of the page or portions
+        //of the page have been rendered.
+        //TODO: we should also figure out how to not reuse the rendering if
+        //the user wants to render twice (say, full color to display to users, but
+        //grayscale for (notionally?) better OCR).
+        PageBasedRenderResults results = (PageBasedRenderResults) renderingState.getRenderResults();
+        if (results != null) {
+            List<RenderResult> pageResults = results.getPage(getCurrentPageNo());
+            if (pageResults.size() == 1) {
+                return pageResults.get(0);
+            }
+        }
+        Renderer thisRenderer = getPDFRenderer(config.getRenderer());
+        //if there's a configured renderer and if the rendering strategy is "all"
+        if (thisRenderer != null &&
+                config.getOcrRenderingStrategy() == PDFParserConfig.OCR_RENDERING_STRATEGY.ALL) {
+            PageRangeRequest pageRangeRequest =
+                    new PageRangeRequest(getCurrentPageNo(), getCurrentPageNo());
+            if (thisRenderer instanceof PDDocumentRenderer) {
+                try (TikaInputStream tis = TikaInputStream.get(new byte[0])) {
+                    Metadata m = new Metadata();
+                    m.set(TikaCoreProperties.TYPE, PDFParser.MEDIA_TYPE.toString());
+                    tis.setOpenContainer(pdDocument);
+                    return thisRenderer.render(tis, m, parseContext, pageRangeRequest)
+                            .getResults().get(0);
+                }
+            } else {
+                Metadata m = new Metadata();
+                m.set(TikaCoreProperties.TYPE, PDFParser.MEDIA_TYPE.toString());
+                PDFRenderingState state = context.get(PDFRenderingState.class);
+                if (state == null) {
+                    throw new IllegalArgumentException("RenderingState must not be null");
+                }
+                return thisRenderer
+                        .render(state.getTikaInputStream(), m, parseContext, pageRangeRequest)
+                        .getResults().get(0);
+            }
+        } else {
+            return noContextRenderCurrentPage(parseContext, tmpResources);
+        }
+    }
+
+    private Renderer getPDFRenderer(Renderer renderer) {
+        if (renderer == null) {
+            return renderer;
+        }
+        if (renderer instanceof CompositeRenderer) {
+            return ((CompositeRenderer)renderer).getLeafRenderer(PDFParser.MEDIA_TYPE);
+        } else if (renderer.getSupportedTypes(context).contains(PDFParser.MEDIA_TYPE)) {
+            return renderer;
+        }
+        return null;
+    }
+
+
+    private RenderResult noContextRenderCurrentPage(ParseContext parseContext,
+                                                    TemporaryResources tmpResources)
+            throws IOException, TikaException {
+        PDFRenderer renderer = null;
+        switch (config.getOcrRenderingStrategy()) {
+            case NO_TEXT:
+                renderer = new NoTextPDFRenderer(pdDocument);
+                break;
+            case TEXT_ONLY:
+                renderer = new TextOnlyPDFRenderer(pdDocument);
+                break;
+            case ALL:
+                renderer = new PDFRenderer(pdDocument);
+                break;
+        }
+
+        int dpi = config.getOcrDPI();
+        Path tmpFile = null;
+        Metadata m = new Metadata();
+        m.set(Rendering.PAGE_NUMBER, pageIndex + 1);
+
+        RenderingTracker renderingTracker = parseContext.get(RenderingTracker.class);
+        if (renderingTracker == null) {
+            renderingTracker = new RenderingTracker();
+            parseContext.set(RenderingTracker.class, renderingTracker);
+        }
+        int id = renderingTracker.getNextId();
+
+        try {
+            BufferedImage image =
+                    renderer.renderImageWithDPI(pageIndex, dpi, config.getOcrImageType());
+            tmpFile = tmpResources.createTempFile();
+            try (OutputStream os = Files.newOutputStream(tmpFile)) {
+                //TODO: get output format from TesseractConfig
+                ImageIOUtil.writeImage(image, config.getOcrImageFormatName(), os, dpi,
+                        config.getOcrImageQuality());
+            }
+        } catch (SecurityException e) {
+            //throw SecurityExceptions immediately
+            throw e;
+        } catch (IOException | RuntimeException e) {
+            //image rendering can throw a variety of runtime exceptions, not just
+            // IOExceptions...
+            //need to have a wide catch
+            metadata.add(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM,
+                    ExceptionUtils.getStackTrace(e));
+
+            return new RenderResult(RenderResult.STATUS.EXCEPTION, id, null, m);
+        }
+        return new RenderResult(RenderResult.STATUS.SUCCESS, id, tmpFile, m);
+    }
+
     @Override
     protected void endPage(PDPage page) throws IOException {
         metadata.add(PDF.CHARACTERS_PER_PAGE, totalCharsPerPage);
@@ -547,7 +645,7 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                     if (fann.getFile() instanceof PDComplexFileSpecification) {
                         handlePDComplexFileSpec(fann.getAttachmentName(),
                                 "annotationFileAttachment",
-                                (PDComplexFileSpecification)fann.getFile());
+                                (PDComplexFileSpecification) fann.getFile());
                     }
                 } else if (annotation instanceof PDAnnotationWidget) {
                     handleWidget((PDAnnotationWidget) annotation);
@@ -560,11 +658,9 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                         //subtype is U3D or PRC or model/ (prefix for model mime type)
                         metadata.set(PDF.HAS_3D, true);
                     }
-                    for (COSDictionary fileSpec :
-                            findFileSpecs(annotation.getCOSObject())) {
+                    for (COSDictionary fileSpec : findFileSpecs(annotation.getCOSObject())) {
                         PDComplexFileSpecification cfs = new PDComplexFileSpecification(fileSpec);
-                        handlePDComplexFileSpec(cfs.getFilename(),
-                                annotationSubtype, cfs);
+                        handlePDComplexFileSpec(cfs.getFilename(), annotationSubtype, cfs);
                     }
                 }
                 // TODO: remove once PDFBOX-1143 is fixed:
@@ -619,13 +715,16 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                 boolean unmappedExceedsLimit = false;
                 if (totalCharsPerPage > config.getOcrStrategyAuto().getTotalCharsPerPage()) {
                     // There are enough characters to not have to do OCR.  Check number of unmapped characters
-                    final float percentUnmapped = (float) unmappedUnicodeCharsPerPage / totalCharsPerPage;
-                    final float unmappedCharacterLimit = config.getOcrStrategyAuto().getUnmappedUnicodeCharsPerPage();
-                    unmappedExceedsLimit = (unmappedCharacterLimit < 1)
-                            ? percentUnmapped > unmappedCharacterLimit
-                            : unmappedUnicodeCharsPerPage > unmappedCharacterLimit;
+                    final float percentUnmapped =
+                            (float) unmappedUnicodeCharsPerPage / totalCharsPerPage;
+                    final float unmappedCharacterLimit =
+                            config.getOcrStrategyAuto().getUnmappedUnicodeCharsPerPage();
+                    unmappedExceedsLimit = (unmappedCharacterLimit < 1) ?
+                            percentUnmapped > unmappedCharacterLimit :
+                            unmappedUnicodeCharsPerPage > unmappedCharacterLimit;
                 }
-                if (totalCharsPerPage <= config.getOcrStrategyAuto().getTotalCharsPerPage() || unmappedExceedsLimit) {
+                if (totalCharsPerPage <= config.getOcrStrategyAuto().getTotalCharsPerPage() ||
+                        unmappedExceedsLimit) {
                     doOCROnCurrentPage(AUTO);
                 }
             }
@@ -664,14 +763,12 @@ class AbstractPDF2XHTML extends PDFTextStripper {
         return PDFDOMUtil.findType(cosDict, types, MAX_RECURSION_DEPTH);
     }
 
-    private void handlePDComplexFileSpec(String attachmentName,
-                                         String annotationType,
+    private void handlePDComplexFileSpec(String attachmentName, String annotationType,
                                          PDComplexFileSpecification fileSpec) throws IOException {
         try {
             AttributesImpl attributes = new AttributesImpl();
             attributes.addAttribute("", "source", "source", "CDATA", annotationType);
-            extractMultiOSPDEmbeddedFiles(attachmentName, fileSpec,
-                    attributes);
+            extractMultiOSPDEmbeddedFiles(attachmentName, fileSpec, attributes);
         } catch (SAXException e) {
             throw new IOException("file embedded in annotation sax exception", e);
         } catch (TikaException e) {
@@ -1130,8 +1227,7 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     }
 
     enum ActionTrigger {
-        AFTER_DOCUMENT_PRINT, AFTER_DOCUMENT_SAVE, ANNOTATION_CURSOR_ENTERS,
-        ANNOTATION_CURSOR_EXIT,
+        AFTER_DOCUMENT_PRINT, AFTER_DOCUMENT_SAVE, ANNOTATION_CURSOR_ENTERS, ANNOTATION_CURSOR_EXIT,
         ANNOTATION_LOSE_INPUT_FOCUS, ANNOTATION_MOUSE_CLICK, ANNOTATION_MOUSE_RELEASED,
         ANNOTATION_PAGE_CLOSED, ANNOTATION_PAGE_NO_LONGER_VISIBLE, ANNOTATION_PAGE_OPENED,
         ANNOTATION_PAGE_VISIBLE, ANNOTATION_RECEIVES_FOCUS, ANNOTATION_WIDGET,
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
index 7493253bb..2658a484a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
@@ -23,12 +23,12 @@ import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
-import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
 
 
 /**
@@ -37,9 +37,9 @@ import org.apache.tika.parser.ParseContext;
  */
 class OCR2XHTML extends AbstractPDF2XHTML {
 
-    private OCR2XHTML(PDDocument document, ContentHandler handler, ParseContext context,
+    private OCR2XHTML(PDDocument document, XHTMLContentHandler xhtml, ParseContext context,
                       Metadata metadata, PDFParserConfig config) throws IOException {
-        super(document, handler, context, metadata, config);
+        super(document, xhtml, context, metadata, config);
     }
 
     /**
@@ -47,17 +47,18 @@ class OCR2XHTML extends AbstractPDF2XHTML {
      * of XHTML SAX events sent to the given content handler.
      *
      * @param document PDF document
-     * @param handler  SAX content handler
+     * @param xhtml  SAX content handler
      * @param metadata PDF metadata
      * @throws SAXException  if the content handler fails to process SAX events
      * @throws TikaException if there was an exception outside of per page processing
      */
-    public static void process(PDDocument document, ContentHandler handler, ParseContext context,
-                               Metadata metadata, PDFParserConfig config)
+    public static void process(PDDocument document, XHTMLContentHandler xhtml, ParseContext context,
+                               Metadata metadata,
+                               PDFParserConfig config)
             throws SAXException, TikaException {
         OCR2XHTML ocr2XHTML = null;
         try {
-            ocr2XHTML = new OCR2XHTML(document, handler, context, metadata, config);
+            ocr2XHTML = new OCR2XHTML(document, xhtml, context, metadata, config);
             ocr2XHTML.writeText(document, new Writer() {
                 @Override
                 public void write(char[] cbuf, int off, int len) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index 93d1b7e81..602a8823e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -34,12 +34,12 @@ import org.apache.pdfbox.pdmodel.PDPageContentStream;
 import org.apache.pdfbox.text.PDFTextStripper;
 import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.util.Matrix;
-import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
 
 /**
  * Utility class that overrides the {@link PDFTextStripper} functionality
@@ -63,9 +63,9 @@ class PDF2XHTML extends AbstractPDF2XHTML {
     private Map<COSStream, Integer> processedInlineImages = new HashMap<>();
     private AtomicInteger inlineImageCounter = new AtomicInteger(0);
 
-    PDF2XHTML(PDDocument document, ContentHandler handler, ParseContext context, Metadata metadata,
+    PDF2XHTML(PDDocument document, XHTMLContentHandler xhtml, ParseContext context, Metadata metadata,
               PDFParserConfig config) throws IOException {
-        super(document, handler, context, metadata, config);
+        super(document, xhtml, context, metadata, config);
     }
 
     /**
@@ -73,12 +73,12 @@ class PDF2XHTML extends AbstractPDF2XHTML {
      * of XHTML SAX events sent to the given content handler.
      *
      * @param document PDF document
-     * @param handler  SAX content handler
+     * @param xhtml  SAX content handler
      * @param metadata PDF metadata
      * @throws SAXException  if the content handler fails to process SAX events
      * @throws TikaException if there was an exception outside of per page processing
      */
-    public static void process(PDDocument document, ContentHandler handler, ParseContext context,
+    public static void process(PDDocument document, XHTMLContentHandler xhtml, ParseContext context,
                                Metadata metadata, PDFParserConfig config)
             throws SAXException, TikaException {
         PDF2XHTML pdf2XHTML = null;
@@ -88,9 +88,9 @@ class PDF2XHTML extends AbstractPDF2XHTML {
             // handler.
             if (config.isDetectAngles()) {
                 pdf2XHTML =
-                        new AngleDetectingPDF2XHTML(document, handler, context, metadata, config);
+                        new AngleDetectingPDF2XHTML(document, xhtml, context, metadata, config);
             } else {
-                pdf2XHTML = new PDF2XHTML(document, handler, context, metadata, config);
+                pdf2XHTML = new PDF2XHTML(document, xhtml, context, metadata, config);
             }
             config.configure(pdf2XHTML);
 
@@ -225,10 +225,10 @@ class PDF2XHTML extends AbstractPDF2XHTML {
 
     private static class AngleDetectingPDF2XHTML extends PDF2XHTML {
 
-        private AngleDetectingPDF2XHTML(PDDocument document, ContentHandler handler,
+        private AngleDetectingPDF2XHTML(PDDocument document, XHTMLContentHandler xhtml,
                                         ParseContext context, Metadata metadata,
                                         PDFParserConfig config) throws IOException {
-            super(document, handler, context, metadata, config);
+            super(document, xhtml, context, metadata, config);
         }
 
         @Override
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
index 5ddf581d9..3e4e1bf64 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFMarkedContent2XHTML.java
@@ -41,12 +41,12 @@ import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructur
 import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
 import org.apache.pdfbox.text.PDFMarkedContentExtractor;
 import org.apache.pdfbox.text.TextPosition;
-import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
 
 /**
  * <p>This was added in Tika 1.24 as an alpha version of a text extractor
@@ -88,10 +88,10 @@ public class PDFMarkedContent2XHTML extends PDF2XHTML {
     //this stores state as we recurse through the structure tag tree
     private State state = new State();
 
-    private PDFMarkedContent2XHTML(PDDocument document, ContentHandler handler,
+    private PDFMarkedContent2XHTML(PDDocument document, XHTMLContentHandler xhtml,
                                    ParseContext context, Metadata metadata, PDFParserConfig config)
             throws IOException {
-        super(document, handler, context, metadata, config);
+        super(document, xhtml, context, metadata, config);
     }
 
     /**
@@ -99,19 +99,20 @@ public class PDFMarkedContent2XHTML extends PDF2XHTML {
      * of XHTML SAX events sent to the given content handler.
      *
      * @param pdDocument PDF document
-     * @param handler    SAX content handler
+     * @param xhtml    SAX content handler
      * @param metadata   PDF metadata
      * @throws SAXException  if the content handler fails to process SAX events
      * @throws TikaException if there was an exception outside of per page processing
      */
-    public static void process(PDDocument pdDocument, ContentHandler handler, ParseContext context,
+    public static void process(PDDocument pdDocument, XHTMLContentHandler xhtml,
+                               ParseContext context,
                                Metadata metadata, PDFParserConfig config)
             throws SAXException, TikaException {
 
         PDFMarkedContent2XHTML pdfMarkedContent2XHTML = null;
         try {
             pdfMarkedContent2XHTML =
-                    new PDFMarkedContent2XHTML(pdDocument, handler, context, metadata, config);
+                    new PDFMarkedContent2XHTML(pdDocument, xhtml, context, metadata, config);
         } catch (IOException e) {
             throw new TikaException("couldn't initialize PDFMarkedContent2XHTML", e);
         }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 3835179b1..28f796157 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -54,6 +54,7 @@ import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaConfigException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.AccessPermissions;
 import org.apache.tika.metadata.Metadata;
@@ -64,6 +65,13 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.RenderingParser;
+import org.apache.tika.renderer.PageRangeRequest;
+import org.apache.tika.renderer.RenderResult;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.Renderer;
+import org.apache.tika.renderer.pdf.PDFBoxRenderer;
+import org.apache.tika.renderer.pdf.PDFRenderingState;
 import org.apache.tika.sax.XHTMLContentHandler;
 
 /**
@@ -96,7 +104,7 @@ import org.apache.tika.sax.XHTMLContentHandler;
  * If your PDFs contain marked content or tags, consider
  * {@link PDFParserConfig#setExtractMarkedContent(boolean)}
  */
-public class PDFParser extends AbstractParser implements Initializable {
+public class PDFParser extends AbstractParser implements RenderingParser, Initializable {
 
     /**
      * Metadata key for giving the document password to the parser.
@@ -105,7 +113,7 @@ public class PDFParser extends AbstractParser implements Initializable {
      * @deprecated Supply a {@link PasswordProvider} on the {@link ParseContext} instead
      */
     public static final String PASSWORD = "org.apache.tika.parser.pdf.password";
-    private static final MediaType MEDIA_TYPE = MediaType.application("pdf");
+    protected static final MediaType MEDIA_TYPE = MediaType.application("pdf");
     /**
      * Serial version UID
      */
@@ -128,12 +136,20 @@ public class PDFParser extends AbstractParser implements Initializable {
         if (localConfig.isSetKCMS()) {
             System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
         }
-
+        initRenderer(localConfig);
         PDDocument pdfDocument = null;
 
         String password = "";
+        PDFRenderingState incomingRenderingState = context.get(PDFRenderingState.class);
         try {
-            TikaInputStream tstream = TikaInputStream.cast(stream);
+            TikaInputStream tstream;
+            if (shouldSpool(localConfig)) {
+                tstream = TikaInputStream.get(stream);
+                tstream.getPath();
+                context.set(PDFRenderingState.class, new PDFRenderingState(tstream));
+            } else {
+                tstream = TikaInputStream.cast(stream);
+            }
             password = getPassword(metadata, context);
             MemoryUsageSetting memoryUsageSetting = MemoryUsageSetting.setupMainMemoryOnly();
             if (localConfig.getMaxMainMemoryBytes() >= 0) {
@@ -149,41 +165,104 @@ public class PDFParser extends AbstractParser implements Initializable {
                 pdfDocument = getPDDocument(new CloseShieldInputStream(stream), password,
                         memoryUsageSetting, metadata, context);
             }
-            metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(pdfDocument.isEncrypted()));
-
-            metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString());
+            boolean hasXFA = hasXFA(pdfDocument, metadata);
+            boolean hasMarkedContent = hasMarkedContent(pdfDocument, metadata);
             extractMetadata(pdfDocument, metadata, context);
             AccessChecker checker = localConfig.getAccessChecker();
             checker.check(metadata);
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+            tstream.setOpenContainer(pdfDocument);
+            handleRendering(pdfDocument, tstream, xhtml, metadata, context, localConfig);
             if (handler != null) {
-                boolean hasXFA = hasXFA(pdfDocument);
-                metadata.set(PDF.HAS_XFA, Boolean.toString(hasXFA));
-                boolean hasMarkedContent = hasMarkedContent(pdfDocument);
-                metadata.set(PDF.HAS_MARKED_CONTENT, Boolean.toString(hasMarkedContent));
-                boolean hasCollection = hasCollection(pdfDocument);
-                metadata.set(PDF.HAS_COLLECTION, Boolean.toString(hasCollection));
                 if (shouldHandleXFAOnly(hasXFA, localConfig)) {
-                    handleXFAOnly(pdfDocument, handler, metadata, context);
+                    handleXFAOnly(pdfDocument, xhtml, metadata, context);
                 } else if (localConfig.getOcrStrategy()
                         .equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) {
-                    OCR2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
+                    OCR2XHTML.process(pdfDocument, xhtml, context, metadata, localConfig);
                 } else if (hasMarkedContent && localConfig.isExtractMarkedContent()) {
                     PDFMarkedContent2XHTML
-                            .process(pdfDocument, handler, context, metadata, localConfig);
+                            .process(pdfDocument, xhtml, context, metadata,
+                                    localConfig);
                 } else {
-                    PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
+                    PDF2XHTML.process(pdfDocument, xhtml, context, metadata,
+                            localConfig);
                 }
             }
         } catch (InvalidPasswordException e) {
             metadata.set(PDF.IS_ENCRYPTED, "true");
             throw new EncryptedDocumentException(e);
         } finally {
-            if (pdfDocument != null) {
-                pdfDocument.close();
+            PDFRenderingState currState = context.get(PDFRenderingState.class);
+            try {
+                if (currState != null && currState.getRenderResults() != null) {
+                    currState.getRenderResults().close();
+                }
+                if (pdfDocument != null) {
+                    pdfDocument.close();
+                }
+            } finally {
+                //replace the one that was here
+                context.set(PDFRenderingState.class, incomingRenderingState);
             }
         }
     }
 
+    private boolean shouldSpool(PDFParserConfig localConfig) {
+        if (localConfig.getImageStrategy() == PDFParserConfig.IMAGE_STRATEGY.RENDERED_PAGES) {
+            return true;
+        }
+        if (localConfig.getOcrStrategy() == PDFParserConfig.OCR_STRATEGY.NO_OCR) {
+            return false;
+        }
+        //TODO: test that this is not AUTO with no OCR parser installed
+        return true;
+    }
+
+    private void handleRendering(PDDocument pdDocument, TikaInputStream tstream,
+                                 ContentHandler xhtml, Metadata parentMetadata,
+                                 ParseContext context,
+                                 PDFParserConfig config) {
+        if (config.getImageStrategy() != PDFParserConfig.IMAGE_STRATEGY.RENDERED_PAGES) {
+            return;
+        }
+        RenderResults renderResults = null;
+        try {
+            renderResults = renderPDF(tstream, context, config);
+        } catch (SecurityException e) {
+            throw e;
+        } catch (Exception e) {
+            EmbeddedDocumentUtil.recordException(e, parentMetadata);
+            return;
+        }
+        context.get(PDFRenderingState.class).setRenderResults(renderResults);
+        EmbeddedDocumentExtractor embeddedDocumentExtractor =
+                EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
+
+        for (RenderResult result : renderResults.getResults()) {
+            if (result.getStatus() == RenderResult.STATUS.SUCCESS) {
+                if (embeddedDocumentExtractor.shouldParseEmbedded(result.getMetadata())) {
+                    try (InputStream is = TikaInputStream.get(result.getPath())) {
+                        embeddedDocumentExtractor.parseEmbedded(is, xhtml, result.getMetadata(),
+                                false);
+                    } catch (SecurityException e) {
+                        throw e;
+                    } catch (Exception e) {
+                        EmbeddedDocumentUtil.recordException(e, parentMetadata);
+                    }
+                }
+            }
+        }
+    }
+
+    private RenderResults renderPDF(TikaInputStream tstream,
+                                    ParseContext parseContext, PDFParserConfig localConfig)
+            throws IOException, TikaException {
+        Metadata metadata = new Metadata();
+        metadata.set(TikaCoreProperties.TYPE, MEDIA_TYPE.toString());
+        return localConfig.getRenderer().render(
+                tstream, metadata, parseContext, PageRangeRequest.RENDER_ALL);
+    }
+
 
     protected PDDocument getPDDocument(InputStream inputStream, String password,
                                        MemoryUsageSetting memoryUsageSetting, Metadata metadata,
@@ -197,7 +276,14 @@ public class PDFParser extends AbstractParser implements Initializable {
         return PDDocument.load(path.toFile(), password, memoryUsageSetting);
     }
 
+    private boolean hasMarkedContent(PDDocument pdDocument, Metadata metadata) {
+        boolean hasMarkedContent = hasMarkedContent(pdDocument);
+        metadata.set(PDF.HAS_MARKED_CONTENT, hasMarkedContent);
+        return hasMarkedContent;
+    }
+
     private boolean hasMarkedContent(PDDocument pdDocument) {
+        boolean hasMarkedContent;
         PDStructureTreeRoot root = pdDocument.getDocumentCatalog().getStructureTreeRoot();
         if (root == null) {
             return false;
@@ -219,6 +305,12 @@ public class PDFParser extends AbstractParser implements Initializable {
         return false;
     }
 
+    private boolean hasCollection(PDDocument pdDocument, Metadata metadata) {
+        boolean hasCollection = hasCollection(pdDocument);
+        metadata.set(PDF.HAS_COLLECTION, hasCollection);
+        return hasCollection;
+    }
+
     private boolean hasCollection(PDDocument pdfDocument) {
         COSDictionary cosDict = pdfDocument.getDocumentCatalog().getCOSObject();
         if (cosDict.containsKey(COSName.COLLECTION)) {
@@ -251,6 +343,7 @@ public class PDFParser extends AbstractParser implements Initializable {
 
     private void extractMetadata(PDDocument document, Metadata metadata, ParseContext context)
             throws TikaException {
+        metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString());
 
         //first extract AccessPermissions
         AccessPermission ap = document.getCurrentAccessPermission();
@@ -265,6 +358,8 @@ public class PDFParser extends AbstractParser implements Initializable {
                 Boolean.toString(ap.canModifyAnnotations()));
         metadata.set(AccessPermissions.CAN_PRINT, Boolean.toString(ap.canPrint()));
         metadata.set(AccessPermissions.CAN_PRINT_DEGRADED, Boolean.toString(ap.canPrintDegraded()));
+        hasCollection(document, metadata);
+        metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(document.isEncrypted()));
 
         if (document.getDocumentCatalog().getLanguage() != null) {
             metadata.set(TikaCoreProperties.LANGUAGE, document.getDocumentCatalog().getLanguage());
@@ -371,21 +466,22 @@ public class PDFParser extends AbstractParser implements Initializable {
     }
 
 
-    private boolean hasXFA(PDDocument pdDocument) {
-        return pdDocument.getDocumentCatalog() != null &&
+    private boolean hasXFA(PDDocument pdDocument, Metadata metadata) {
+        boolean hasXFA = pdDocument.getDocumentCatalog() != null &&
                 pdDocument.getDocumentCatalog().getAcroForm(null) != null &&
                 pdDocument.getDocumentCatalog().getAcroForm(null).hasXFA();
+        metadata.set(PDF.HAS_XFA, Boolean.toString(hasXFA));
+        return hasXFA;
     }
 
     private boolean shouldHandleXFAOnly(boolean hasXFA, PDFParserConfig config) {
         return config.isIfXFAExtractOnlyXFA() && hasXFA;
     }
 
-    private void handleXFAOnly(PDDocument pdDocument, ContentHandler handler, Metadata metadata,
+    private void handleXFAOnly(PDDocument pdDocument, XHTMLContentHandler xhtml, Metadata metadata,
                                ParseContext context)
             throws SAXException, IOException, TikaException {
         XFAExtractor ex = new XFAExtractor();
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
         xhtml.startDocument();
         try (InputStream is = new ByteArrayInputStream(
                 pdDocument.getDocumentCatalog().getAcroForm(null).getXFA().getBytes())) {
@@ -622,6 +718,27 @@ public class PDFParser extends AbstractParser implements Initializable {
         //no-op
     }
 
+    private void initRenderer(PDFParserConfig config) {
+        if (config.getRenderer() != null) {
+            return;
+        }
+        //set a default renderer if nothing was defined
+        PDFBoxRenderer pdfBoxRenderer = new PDFBoxRenderer();
+        pdfBoxRenderer.setDPI(defaultConfig.getOcrDPI());
+        pdfBoxRenderer.setImageType(defaultConfig.getOcrImageType());
+        pdfBoxRenderer.setImageFormatName(defaultConfig.getOcrImageFormatName());
+        config.setRenderer(pdfBoxRenderer);
+    }
+
+    @Override
+    public void setRenderer(Renderer renderer) {
+        defaultConfig.setRenderer(renderer);
+    }
+
+    public void setImageStrategy(String imageStrategy) {
+        defaultConfig.setImageStrategy(imageStrategy);
+    }
+
     /**
      * Copied from AcroformDefaultFixup minus generation of appearances and handling of orphan
      * widgets, which we don't need.
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index af0e9617b..fb8a315ae 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -21,6 +21,7 @@ import java.lang.reflect.Field;
 import java.lang.reflect.Modifier;
 import java.util.HashSet;
 import java.util.Locale;
+import java.util.Objects;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -29,6 +30,7 @@ import org.apache.pdfbox.rendering.ImageType;
 import org.apache.pdfbox.text.PDFTextStripper;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.renderer.Renderer;
 
 /**
  * Config for PDFParser.
@@ -112,6 +114,10 @@ public class PDFParserConfig implements Serializable {
     private String ocrImageFormatName = "png";
     private float ocrImageQuality = 1.0f;
 
+    /**
+     * Should the entire document be rendered?
+     */
+    private IMAGE_STRATEGY imageStrategy = IMAGE_STRATEGY.NONE;
     private AccessChecker accessChecker = new AccessChecker();
 
     //The PDFParser can throw IOExceptions if there is a problem
@@ -130,6 +136,8 @@ public class PDFParserConfig implements Serializable {
 
     private boolean detectAngles = false;
 
+    private Renderer renderer;
+
     /**
      * @return whether or not to extract only inline image metadata and not render the images
      */
@@ -791,114 +799,76 @@ public class PDFParserConfig implements Serializable {
         if (this == o) {
             return true;
         }
-        if (!(o instanceof PDFParserConfig)) {
+        if (o == null || getClass() != o.getClass()) {
             return false;
         }
-
         PDFParserConfig config = (PDFParserConfig) o;
-
-        if (isEnableAutoSpace() != config.isEnableAutoSpace()) {
-            return false;
-        }
-        if (isSuppressDuplicateOverlappingText() != config.isSuppressDuplicateOverlappingText()) {
-            return false;
-        }
-        if (isExtractAnnotationText() != config.isExtractAnnotationText()) {
-            return false;
-        }
-        if (isSortByPosition() != config.isSortByPosition()) {
-            return false;
-        }
-        if (isExtractAcroFormContent() != config.isExtractAcroFormContent()) {
-            return false;
-        }
-        if (isExtractBookmarksText() != config.isExtractBookmarksText()) {
-            return false;
-        }
-        if (isExtractInlineImages() != config.isExtractInlineImages()) {
-            return false;
-        }
-        if (isExtractUniqueInlineImagesOnly() != config.isExtractUniqueInlineImagesOnly()) {
-            return false;
-        }
-        if (isIfXFAExtractOnlyXFA() != config.isIfXFAExtractOnlyXFA()) {
-            return false;
-        }
-        if (getOcrDPI() != config.getOcrDPI()) {
-            return false;
-        }
-        if (isCatchIntermediateIOExceptions() != config.isCatchIntermediateIOExceptions()) {
-            return false;
-        }
-        if (!getAverageCharTolerance().equals(config.getAverageCharTolerance())) {
-            return false;
-        }
-        if (!getSpacingTolerance().equals(config.getSpacingTolerance())) {
-            return false;
-        }
-        if (!getDropThreshold().equals(config.getDropThreshold())) {
-            return false;
-        }
-        if (!getOcrStrategy().equals(config.getOcrStrategy())) {
-            return false;
-        }
-        if (getOcrImageType() != config.getOcrImageType()) {
-            return false;
-        }
-        if (!getOcrImageFormatName().equals(config.getOcrImageFormatName())) {
-            return false;
-        }
-        if (isExtractActions() != config.isExtractActions()) {
-            return false;
-        }
-        if (!getAccessChecker().equals(config.getAccessChecker())) {
-            return false;
-        }
-        return getMaxMainMemoryBytes() == config.getMaxMainMemoryBytes();
+        return enableAutoSpace == config.enableAutoSpace &&
+                suppressDuplicateOverlappingText == config.suppressDuplicateOverlappingText &&
+                extractAnnotationText == config.extractAnnotationText &&
+                sortByPosition == config.sortByPosition &&
+                extractAcroFormContent == config.extractAcroFormContent &&
+                extractBookmarksText == config.extractBookmarksText &&
+                extractInlineImages == config.extractInlineImages &&
+                extractInlineImageMetadataOnly == config.extractInlineImageMetadataOnly &&
+                extractUniqueInlineImagesOnly == config.extractUniqueInlineImagesOnly &&
+                extractMarkedContent == config.extractMarkedContent &&
+                Float.compare(config.dropThreshold, dropThreshold) == 0 &&
+                ifXFAExtractOnlyXFA == config.ifXFAExtractOnlyXFA && ocrDPI == config.ocrDPI &&
+                Float.compare(config.ocrImageQuality, ocrImageQuality) == 0 &&
+                catchIntermediateIOExceptions == config.catchIntermediateIOExceptions &&
+                extractActions == config.extractActions &&
+                extractFontNames == config.extractFontNames &&
+                maxMainMemoryBytes == config.maxMainMemoryBytes && setKCMS == config.setKCMS &&
+                detectAngles == config.detectAngles &&
+                Objects.equals(userConfigured, config.userConfigured) &&
+                Objects.equals(averageCharTolerance, config.averageCharTolerance) &&
+                Objects.equals(spacingTolerance, config.spacingTolerance) &&
+                ocrStrategy == config.ocrStrategy &&
+                Objects.equals(ocrStrategyAuto, config.ocrStrategyAuto) &&
+                ocrRenderingStrategy == config.ocrRenderingStrategy &&
+                ocrImageType == config.ocrImageType &&
+                Objects.equals(ocrImageFormatName, config.ocrImageFormatName) &&
+                imageStrategy == config.imageStrategy &&
+                Objects.equals(accessChecker, config.accessChecker) &&
+                Objects.equals(renderer, config.renderer);
     }
 
     @Override
     public int hashCode() {
-        int result = (isEnableAutoSpace() ? 1 : 0);
-        result = 31 * result + (isSuppressDuplicateOverlappingText() ? 1 : 0);
-        result = 31 * result + (isExtractAnnotationText() ? 1 : 0);
-        result = 31 * result + (isSortByPosition() ? 1 : 0);
-        result = 31 * result + (isExtractAcroFormContent() ? 1 : 0);
-        result = 31 * result + (isExtractBookmarksText() ? 1 : 0);
-        result = 31 * result + (isExtractInlineImages() ? 1 : 0);
-        result = 31 * result + (isExtractUniqueInlineImagesOnly() ? 1 : 0);
-        result = 31 * result + getAverageCharTolerance().hashCode();
-        result = 31 * result + getSpacingTolerance().hashCode();
-        result = 31 * result + getDropThreshold().hashCode();
-        result = 31 * result + (isIfXFAExtractOnlyXFA() ? 1 : 0);
-        result = 31 * result + ocrStrategy.hashCode();
-        result = 31 * result + getOcrDPI();
-        result = 31 * result + getOcrImageType().hashCode();
-        result = 31 * result + getOcrImageFormatName().hashCode();
-        result = 31 * result + getAccessChecker().hashCode();
-        result = 31 * result + (isCatchIntermediateIOExceptions() ? 1 : 0);
-        result = 31 * result + (isExtractActions() ? 1 : 0);
-        result = 31 * result + Long.valueOf(getMaxMainMemoryBytes()).hashCode();
-        return result;
+        return Objects.hash(userConfigured, enableAutoSpace, suppressDuplicateOverlappingText,
+                extractAnnotationText, sortByPosition, extractAcroFormContent, extractBookmarksText,
+                extractInlineImages, extractInlineImageMetadataOnly, extractUniqueInlineImagesOnly,
+                extractMarkedContent, averageCharTolerance, spacingTolerance, dropThreshold,
+                ifXFAExtractOnlyXFA, ocrStrategy, ocrStrategyAuto, ocrRenderingStrategy, ocrDPI,
+                ocrImageType, ocrImageFormatName, ocrImageQuality, imageStrategy, accessChecker,
+                catchIntermediateIOExceptions, extractActions, extractFontNames, maxMainMemoryBytes,
+                setKCMS, detectAngles, renderer);
     }
 
-    @Override
-    public String toString() {
-        return "PDFParserConfig{" + "enableAutoSpace=" + enableAutoSpace +
-                ", suppressDuplicateOverlappingText=" + suppressDuplicateOverlappingText +
-                ", extractAnnotationText=" + extractAnnotationText + ", sortByPosition=" +
-                sortByPosition + ", extractAcroFormContent=" + extractAcroFormContent +
-                ", extractBookmarksText=" + extractBookmarksText + ", extractInlineImages=" +
-                extractInlineImages + ", extractUniqueInlineImagesOnly=" +
-                extractUniqueInlineImagesOnly + ", averageCharTolerance=" + averageCharTolerance +
-                ", spacingTolerance=" + spacingTolerance + ", dropThreshold=" + dropThreshold +
-                ", ifXFAExtractOnlyXFA=" + ifXFAExtractOnlyXFA + ", ocrStrategy=" + ocrStrategy +
-                ", ocrDPI=" + ocrDPI + ", ocrImageType=" + ocrImageType + ", ocrImageFormatName='" +
-                ocrImageFormatName + '\'' + ", accessChecker=" + accessChecker +
-                ", extractActions=" + extractActions + ", catchIntermediateIOExceptions=" +
-                catchIntermediateIOExceptions + ", maxMainMemoryBytes=" + maxMainMemoryBytes + '}';
+    public void setRenderer(Renderer renderer) {
+        this.renderer = renderer;
     }
 
+    public Renderer getRenderer() {
+        return renderer;
+    }
+
+    public void setImageStrategy(String imageStrategy) {
+        setImageStrategy(PDFParserConfig.IMAGE_STRATEGY.parse(imageStrategy));
+    }
+
+    public void setImageStrategy(IMAGE_STRATEGY imageStrategy) {
+        this.imageStrategy = imageStrategy;
+        userConfigured.add("imageStrategy");
+    }
+
+    public IMAGE_STRATEGY getImageStrategy() {
+        return imageStrategy;
+    }
+
+
+
     public enum OCR_STRATEGY {
         AUTO, NO_OCR, OCR_ONLY, OCR_AND_TEXT_EXTRACTION;
 
@@ -960,18 +930,22 @@ public class PDFParserConfig implements Serializable {
     }
 
     public enum OCR_RENDERING_STRATEGY {
-        NO_TEXT, ALL; //AUTO?
-        // Would TEXT_ONLY be useful in instances where the unicode mappings
-        // are corrupt/non-existent?
+        NO_TEXT, TEXT_ONLY, ALL; //AUTO?
 
         private static OCR_RENDERING_STRATEGY parse(String s) {
             if (s == null) {
-                return NO_TEXT;
-            } else if ("no_text".equals(s.toLowerCase(Locale.ROOT))) {
-                return NO_TEXT;
-            } else if ("all".equals(s.toLowerCase(Locale.ROOT))) {
                 return ALL;
             }
+            String lc = s.toLowerCase(Locale.US);
+            switch (lc) {
+                case "text_only":
+                    return TEXT_ONLY;
+                case "no_text":
+                    return NO_TEXT;
+                case "all":
+                    return ALL;
+            }
+
             StringBuilder sb = new StringBuilder();
             sb.append("I regret that I don't recognize '").append(s);
             sb.append("' as an OCR_STRATEGY. I only recognize:");
@@ -986,4 +960,34 @@ public class PDFParserConfig implements Serializable {
             throw new IllegalArgumentException(sb.toString());
         }
     }
+
+    public enum IMAGE_STRATEGY {
+        NONE, RAW_IMAGES, RENDERED_PAGES;//TODO: add LOGICAL_IMAGES
+
+        private static IMAGE_STRATEGY parse(String s) {
+            String lc = s.toLowerCase(Locale.US);
+            switch (lc) {
+                case "rawImages" :
+                    return RAW_IMAGES;
+                case "renderedPages":
+                    return RENDERED_PAGES;
+                case "none":
+                    return NONE;
+                default:
+                    //fall through to exception
+                    break;
+            }
+            StringBuilder sb = new StringBuilder();
+            sb.append("I regret that I don't recognize '").append(s);
+            sb.append("' as an IMAGE_STRATEGY. I only recognize:");
+            int i = 0;
+            for (IMAGE_STRATEGY strategy : IMAGE_STRATEGY.values()) {
+                if (i++ > 0) {
+                    sb.append(", ");
+                }
+                sb.append(strategy.toString());
+            }
+            throw new IllegalArgumentException(sb.toString());
+        }
+    }
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/TextOnlyPDFRenderer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/TextOnlyPDFRenderer.java
new file mode 100644
index 000000000..f282d124c
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/TextOnlyPDFRenderer.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.pdf;
+
+import java.awt.Graphics2D;
+import java.awt.geom.Point2D;
+import java.io.IOException;
+
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.rendering.PageDrawer;
+import org.apache.pdfbox.rendering.PageDrawerParameters;
+
+/**
+ * This class extends the PDFRenderer to render only the textual
+ * elements
+ */
+public class TextOnlyPDFRenderer extends PDFRenderer {
+
+    public TextOnlyPDFRenderer(PDDocument document) {
+        super(document);
+    }
+
+    /**
+     * Returns a new PageDrawer instance, using the given parameters. May be overridden.
+     */
+    protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException {
+        PageDrawer pageDrawer = new TextOnlyPageDrawer(parameters);
+        pageDrawer.setAnnotationFilter(getAnnotationsFilter());
+        return pageDrawer;
+    }
+
+    private class TextOnlyPageDrawer extends PageDrawer {
+        public TextOnlyPageDrawer(PageDrawerParameters parameters) throws IOException {
+            super(parameters);
+        }
+
+        @Override
+        protected void transferClip(Graphics2D graphics) {
+
+        }
+
+        @Override
+        public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) {
+
+        }
+
+        @Override
+        public void strokePath() throws IOException {
+
+        }
+
+        @Override
+        public void fillPath(int windingRule) throws IOException {
+        }
+
+        @Override
+        public void fillAndStrokePath(int windingRule) throws IOException {
+        }
+
+        @Override
+        public void clip(int windingRule) {
+        }
+
+        @Override
+        public void lineTo(float x, float y) {
+        }
+
+        @Override
+        public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) {
+        }
+
+        @Override
+        public void closePath() {
+        }
+
+        @Override
+        public void endPath() {
+        }
+
+        @Override
+        public void drawImage(PDImage pdImage) throws IOException {
+
+        }
+
+        @Override
+        public void shadingFill(COSName shadingName) throws IOException {
+        }
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/MuPDFRenderer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/MuPDFRenderer.java
new file mode 100644
index 000000000..983934677
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/MuPDFRenderer.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer.pdf;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Rendering;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.PageBasedRenderResults;
+import org.apache.tika.renderer.PageRangeRequest;
+import org.apache.tika.renderer.RenderRequest;
+import org.apache.tika.renderer.RenderResult;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.Renderer;
+import org.apache.tika.renderer.RenderingTracker;
+import org.apache.tika.utils.FileProcessResult;
+import org.apache.tika.utils.ProcessUtils;
+
+public class MuPDFRenderer implements Renderer {
+
+    Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
+                                RenderRequest... requests) throws IOException, TikaException {
+        TemporaryResources tmp = new TemporaryResources();
+        PageBasedRenderResults results = new PageBasedRenderResults(tmp);
+        Path path = TikaInputStream.get(is, tmp).getPath();
+        for (RenderRequest request : requests) {
+            renderRequest(path, metadata, parseContext, request, results, tmp);
+        }
+        return results;
+    }
+
+    private RenderResults renderRequest(Path pdf, Metadata metadata, ParseContext parseContext,
+                                        RenderRequest request, RenderResults results,
+                                        TemporaryResources tmp) throws TikaException, IOException {
+        if (! (request instanceof PageRangeRequest)) {
+            throw new TikaException("I regret that this renderer can only handle " +
+                    "PageRangeRequests, not " + request.getClass());
+        }
+        PageRangeRequest rangeRequest = (PageRangeRequest)request;
+        RenderingTracker tracker = parseContext.get(RenderingTracker.class);
+        if (tracker == null) {
+            tracker = new RenderingTracker();
+            parseContext.set(RenderingTracker.class, tracker);
+        }
+
+        Path dir = Files.createTempDirectory("tika-render-");
+        //TODO -- this assumes files have been deleted first
+        //do something smarter
+        tmp.addResource(new Closeable() {
+            @Override
+            public void close() throws IOException {
+                Files.delete(dir);
+            }
+        });
+        //TODO -- run mutool pages to get page sizes
+        //and then use that information in the -O to get proper scaling
+        //etc.
+        // This would also allow us to run on a single page at a time if that's of any interest
+        String[] args = createCommandLine(pdf, dir, rangeRequest);
+
+        ProcessBuilder builder = new ProcessBuilder();
+        builder.command(args);
+        //TODO: parameterize timeout
+        FileProcessResult result = ProcessUtils.execute(builder, 60000, 10, 1000);
+        if (result.getExitValue() != 0) {
+            throw new TikaException(result.getStderr());
+        }
+        //TODO -- fix this
+        Matcher m = Pattern.compile("tika-mutool-render-(\\d+)\\.png").matcher("");
+        for (File f : dir.toFile().listFiles()) {
+            String n = f.getName();
+            if (m.reset(n).find()) {
+                int pageIndex = Integer.parseInt(m.group(1));
+                Metadata renderMetadata = new Metadata();
+                renderMetadata.set(Rendering.PAGE_NUMBER, pageIndex);
+                renderMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+                        TikaCoreProperties.EmbeddedResourceType.RENDERING.name());
+                results.add(new RenderResult(RenderResult.STATUS.SUCCESS, tracker.getNextId(),
+                        f.toPath(), renderMetadata));
+            }
+        }
+
+        return results;
+    }
+
+    private String[] createCommandLine(Path pdf, Path dir, PageRangeRequest request) {
+        //TODO parameterize all the things; mutool path, colorspace and size and format and...
+        List<String> args = new ArrayList<>();
+        args.add("mutool");
+        args.add("convert");
+        args.add("-O colorspace=gray");
+        args.add("-o");
+        args.add(
+                ProcessUtils.escapeCommandLine(
+                        dir.toAbsolutePath().toString() + "/" + "tika-mutool-render-%d.png"));
+        args.add(ProcessUtils.escapeCommandLine(pdf.toAbsolutePath().toString()));
+        if (request != PageRangeRequest.RENDER_ALL) {
+            StringBuilder sb = new StringBuilder();
+            int cnt = 0;
+            for (int i = request.getFrom(); i <= request.getTo(); i++) {
+                if (cnt++ > 0) {
+                    sb.append(",");
+                }
+                sb.append(i);
+            }
+            args.add(sb.toString());
+        }
+        return args.toArray(new String[0]);
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDDocumentRenderer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDDocumentRenderer.java
new file mode 100644
index 000000000..7cecd9a23
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDDocumentRenderer.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer.pdf;
+
+import org.apache.tika.renderer.Renderer;
+
+/**
+ * stub interface for the PDFParser to use to figure out if it needs
+ * to pass on the PDDocument or create a temp file to be used
+ * by a file-based renderer down the road.
+ */
+public interface PDDocumentRenderer extends Renderer {
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java
new file mode 100644
index 000000000..e5c5d8973
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer.pdf;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.tools.imageio.ImageIOUtil;
+
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.Rendering;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.PageBasedRenderResults;
+import org.apache.tika.renderer.PageRangeRequest;
+import org.apache.tika.renderer.RenderRequest;
+import org.apache.tika.renderer.RenderResult;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.RenderingTracker;
+
+public class PDFBoxRenderer implements PDDocumentRenderer, Initializable {
+
+    Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+
+    /**
+     * This is the amount of time it takes for PDFBox to render the page
+     */
+    public static Property PDFBOX_RENDERING_TIME_MS =
+            Property.externalReal(Rendering.RENDERING_PREFIX + "pdfbox-rendering-ms");
+
+    /**
+     * This is the amount of time it takes for PDFBox/java to write the image after
+     * it has been rendered into a BufferedImage.  Some formats take much longer
+     * to encode than others.
+     */
+    public static Property PDFBOX_IMAGE_WRITING_TIME_MS =
+            Property.externalReal(Rendering.RENDERING_PREFIX + "pdfbox-image-writing-ms");
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    private int dpi = 300;
+    private ImageType imageType = ImageType.GRAY;
+    private String imageFormatName = "tiff";
+
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext,
+                                RenderRequest... requests) throws IOException, TikaException {
+
+
+        PDDocument pdDocument;
+        TikaInputStream tis = TikaInputStream.get(is);
+        boolean mustClose = false;
+        if (tis.getOpenContainer() != null) {
+            pdDocument = (PDDocument) tis.getOpenContainer();
+        } else {
+            pdDocument = PDDocument.load(is);
+            mustClose = true;
+        }
+        PageBasedRenderResults results = new PageBasedRenderResults(new TemporaryResources());
+        try {
+            for (RenderRequest renderRequest : requests) {
+                processRequest(renderRequest, pdDocument, metadata, parseContext, results);
+            }
+        } finally {
+            if (mustClose) {
+                pdDocument.close();
+            }
+        }
+        return results;
+    }
+
+    private void processRequest(RenderRequest renderRequest, PDDocument pdDocument,
+                                Metadata metadata, ParseContext parseContext,
+                                PageBasedRenderResults results) {
+        if (renderRequest == PageRangeRequest.RENDER_ALL || renderRequest.equals(PageRangeRequest.RENDER_ALL)) {
+            renderRange(pdDocument, 1, pdDocument.getNumberOfPages(),
+                    metadata, parseContext, results);
+        } else if (renderRequest instanceof PageRangeRequest) {
+            int start = ((PageRangeRequest)renderRequest).getFrom();
+            int toInclusive = ((PageRangeRequest)renderRequest).getTo();
+            renderRange(pdDocument, start, toInclusive, metadata, parseContext, results);
+        }
+    }
+
+    private void renderRange(PDDocument pdDocument, int start, int endInclusive, Metadata metadata,
+                                    ParseContext parseContext, PageBasedRenderResults results) {
+        PDFRenderer renderer = new PDFRenderer(pdDocument);
+        RenderingTracker tracker = parseContext.get(RenderingTracker.class);
+        if (tracker == null) {
+            tracker = new RenderingTracker();
+            parseContext.set(RenderingTracker.class, tracker);
+        }
+        for (int i = start; i <= endInclusive; i++) {
+            int id = tracker.getNextId();
+            Metadata m = new Metadata();
+            m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+                    TikaCoreProperties.EmbeddedResourceType.RENDERING.name());
+            try {
+                m.set(Rendering.PAGE_NUMBER, i);
+                Path imagePath = renderPage(renderer, id, i, m);
+                results.add(new RenderResult(RenderResult.STATUS.SUCCESS, id, imagePath, m));
+            } catch (IOException e) {
+                EmbeddedDocumentUtil.recordException(e, m);
+                results.add(new RenderResult(RenderResult.STATUS.EXCEPTION, id, null, m));
+            }
+        }
+    }
+
+
+    private Path renderPage(PDFRenderer renderer, int id, int pageNumber, Metadata metadata)
+            throws IOException {
+
+        Path tmpFile = Files.createTempFile("tika-pdfbox-rendering-",
+                "-" + id + "-" + pageNumber + "." + imageFormatName);
+        try {
+            long start = System.currentTimeMillis();
+            BufferedImage image = renderer.renderImageWithDPI(pageNumber - 1, dpi, imageType);
+            long renderingElapsed = System.currentTimeMillis() - start;
+            metadata.set(PDFBOX_RENDERING_TIME_MS, renderingElapsed);
+            start = System.currentTimeMillis();
+            try (OutputStream os = Files.newOutputStream(tmpFile)) {
+                ImageIOUtil.writeImage(image, imageFormatName, os, dpi);
+            }
+            long elapsedWrite = System.currentTimeMillis() - start;
+            metadata.set(PDFBOX_IMAGE_WRITING_TIME_MS, elapsedWrite);
+            metadata.set(Rendering.RENDERED_MS, renderingElapsed + elapsedWrite);
+        } catch (SecurityException e) {
+            //throw SecurityExceptions immediately
+            throw e;
+        } catch (IOException | RuntimeException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        return tmpFile;
+    }
+
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        //check file format names
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+
+    }
+
+    public void setDPI(int dpi) {
+        this.dpi = dpi;
+    }
+
+
+    public void setImageType(ImageType imageType) {
+        this.imageType = imageType;
+    }
+
+    public void setImageFormatName(String imageFormatName) {
+        this.imageFormatName = imageFormatName;
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFRenderingState.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFRenderingState.java
new file mode 100644
index 000000000..51ea0ae96
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFRenderingState.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer.pdf;
+
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.RenderingState;
+
+public class PDFRenderingState extends RenderingState {
+
+    private TikaInputStream tis;
+
+    private RenderResults renderResults;
+
+    public PDFRenderingState(TikaInputStream tis) {
+        this.tis = tis;
+    }
+
+    public TikaInputStream getTikaInputStream() {
+        return tis;
+    }
+
+
+    public void setRenderResults(RenderResults renderResults) {
+        this.renderResults = renderResults;
+    }
+
+    public RenderResults getRenderResults() {
+        return renderResults;
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFRenderingTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFRenderingTest.java
new file mode 100644
index 000000000..ed61e2a02
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFRenderingTest.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.pdf;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.junit.jupiter.api.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Rendering;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+
+public class PDFRenderingTest extends TikaTest {
+
+
+    @Test
+    public void testBasic() throws Exception {
+        ParseContext parseContext = configureParseContext();
+        TikaConfig config = getConfig("tika-rendering-config.xml");
+        Parser p = new AutoDetectParser(config);
+        List<Metadata> metadataList = getRecursiveMetadata("testPDF.pdf", p, parseContext);
+        Map<Integer, byte[]> embedded =
+                ((RenderCaptureExtractor)parseContext.get(EmbeddedDocumentExtractor.class))
+                        .getEmbedded();
+
+        assertEquals(1, embedded.size());
+        assertTrue(embedded.containsKey(0));
+        //what else can we do to test this?  File type == tiff? Run OCR?
+        assertTrue(embedded.get(0).length > 1000);
+
+        assertEquals(2, metadataList.size());
+        Metadata tiffMetadata = metadataList.get(1);
+        assertEquals("RENDERING", tiffMetadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+        assertEquals(1, tiffMetadata.getInt(Rendering.PAGE_NUMBER));
+    }
+
+    private TikaConfig getConfig(String path) throws TikaException, IOException, SAXException {
+        try (InputStream is = PDFRenderingTest.class.getResourceAsStream(path)) {
+            return new TikaConfig(is);
+        }
+    }
+
+    private ParseContext configureParseContext() {
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(EmbeddedDocumentExtractor.class, new RenderCaptureExtractor(parseContext));
+        PDFParserConfig config = new PDFParserConfig();
+        config.setImageStrategy(PDFParserConfig.IMAGE_STRATEGY.RENDERED_PAGES);
+        parseContext.set(PDFParserConfig.class, config);
+        return parseContext;
+    }
+
+
+    private class RenderCaptureExtractor extends ParsingEmbeddedDocumentExtractor {
+        private int count = 0;
+        Map<Integer, byte[]> embedded = new HashMap<>();
+
+        public RenderCaptureExtractor(ParseContext context) {
+            super(context);
+        }
+
+        public void parseEmbedded(InputStream stream, ContentHandler handler, Metadata metadata,
+                                  boolean outputHtml) throws SAXException, IOException {
+            TikaInputStream tstream = TikaInputStream.get(stream);
+            byte[] bytes = Files.readAllBytes(tstream.getPath());
+            embedded.put(count++, bytes);
+            try (InputStream is = Files.newInputStream(tstream.getPath())) {
+                super.parseEmbedded(is, handler, metadata, outputHtml);
+            }
+        }
+
+        public Map<Integer, byte[]> getEmbedded() {
+            return embedded;
+        }
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-rendering-config.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-rendering-config.xml
new file mode 100644
index 000000000..5b1351662
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-rendering-config.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DefaultParser"/>
+    </parsers>
+    <renderers>
+        <renderer class="org.apache.tika.renderer.pdf.PDFBoxRenderer"/>
+    </renderers>
+</properties>
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
index 111825101..00cccbdc9 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/crypto/TSDParserTest.java
@@ -26,14 +26,20 @@ import org.junit.jupiter.api.Test;
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParserConfig;
 
 public class TSDParserTest extends TikaTest {
 
     @Test
     public void testBrokenPdf() throws Exception {
+        ParseContext parseContext = new ParseContext();
+        PDFParserConfig config = new PDFParserConfig();
+        config.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.NO_OCR);
+        parseContext.set(PDFParserConfig.class, config);
         //make sure that embedded file appears in list
         //and make sure embedded exception is recorded
-        List<Metadata> list = getRecursiveMetadata("testTSD_broken_pdf.tsd");
+        List<Metadata> list = getRecursiveMetadata("testTSD_broken_pdf.tsd", parseContext);
         assertEquals(2, list.size());
         assertEquals("application/pdf", list.get(1).get(Metadata.CONTENT_TYPE));
         assertNotNull(list.get(1).get(TikaCoreProperties.EMBEDDED_EXCEPTION));
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 8c8d4d068..bac5fc6a9 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -46,6 +46,7 @@ import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.ocr.TesseractOCRParser;
 import org.apache.tika.parser.xml.XMLProfiler;
@@ -62,6 +63,8 @@ public class PDFParserTest extends TikaTest {
     public static Level PDFBOX_LOG_LEVEL = Level.INFO;
     private static Boolean hasTesseract = null;
 
+    private static Boolean hasMuPDF = null;
+
     public static boolean canRunOCR() throws TikaConfigException {
         if (hasTesseract != null) {
             return hasTesseract;
@@ -70,6 +73,14 @@ public class PDFParserTest extends TikaTest {
         return hasTesseract;
     }
 
+    public static boolean hasMuPDF() throws TikaConfigException {
+        if (hasMuPDF != null) {
+            return hasMuPDF;
+        }
+        hasMuPDF = ExternalParser.check(new String[]{"mutool", "-v"});
+        return hasMuPDF;
+    }
+
     @BeforeAll
     public static void setup() {
         //remember default logging level, but turn off for PDFParserTest
@@ -440,4 +451,20 @@ public class PDFParserTest extends TikaTest {
         }
     }
 
+    @Test
+    public void testMuPDFInOCR() throws Exception {
+        //TODO -- need to add "rendered by" to confirm that mutool was actually called
+        //and that there wasn't some backoff to PDFBox the PDFParser
+        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(hasMuPDF(), "has mupdf");
+        try (InputStream is = getResourceAsStream(
+                "/configs/tika-rendering-mupdf-config.xml")) {
+            assertNotNull(is);
+            TikaConfig tikaConfig = new TikaConfig(is);
+            Parser p = new AutoDetectParser(tikaConfig);
+            String text = getText(getResourceAsStream("/test-documents/testOCR.pdf"), p);
+            assertContains("Happy", text.trim());
+        }
+    }
+
 }
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-rendering-mupdf-config.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-rendering-mupdf-config.xml
new file mode 100644
index 000000000..1034d05eb
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-rendering-mupdf-config.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.DefaultParser"/>
+    </parsers>
+    <renderers>
+        <renderer class="org.apache.tika.renderer.pdf.MuPDFRenderer"/>
+    </renderers>
+</properties>