You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/26 21:51:03 UTC

[tika] 01/01: TIKA-3571 -- WIP, checkpoint commit. Do not merge.

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3571
in repository https://gitbox.apache.org/repos/asf/tika.git

commit c3261d6d0b272f452b1343e2491da7b75adb64db
Author: tallison <ta...@apache.org>
AuthorDate: Tue Apr 26 17:50:44 2022 -0400

    TIKA-3571 -- WIP, checkpoint commit. Do not merge.
---
 .../java/org/apache/tika/config/TikaConfig.java    | 130 +++++++++++++++++--
 .../java/org/apache/tika/metadata/Rendering.java   |  27 ++++
 .../apache/tika/metadata/TikaCoreProperties.java   |   5 +-
 .../java/org/apache/tika/parser/DefaultParser.java |  34 ++++-
 .../org/apache/tika/parser/RenderingParser.java    |  24 ++++
 .../apache/tika/renderer/CompositeRenderer.java    |  99 +++++++++++++++
 .../org/apache/tika/renderer/RenderResult.java     |  52 ++++++++
 .../org/apache/tika/renderer/RenderResults.java    |  53 ++++++++
 .../java/org/apache/tika/renderer/Renderer.java    |  49 +++++++
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  34 +++--
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |   8 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |  11 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |  32 ++++-
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  11 ++
 .../apache/tika/renderer/pdf/PDFBoxRenderer.java   | 141 +++++++++++++++++++++
 15 files changed, 668 insertions(+), 42 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 1606262f6..d16f6f171 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -74,7 +74,10 @@ import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.RenderingParser;
 import org.apache.tika.parser.multiple.AbstractMultipleParser;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.AnnotationUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 
@@ -95,6 +98,7 @@ public class TikaConfig {
     private final MimeTypes mimeTypes;
     private final ExecutorService executorService;
     private final EncodingDetector encodingDetector;
+    private final Renderer renderer;
     private final MetadataFilter metadataFilter;
     private final AutoDetectParserConfig autoDetectParserConfig;
 
@@ -155,12 +159,14 @@ public class TikaConfig {
         TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
         EncodingDetectorXmlLoader encodingDetectorXmlLoader = new EncodingDetectorXmlLoader();
+        RendererXmlLoader rendererXmlLoader = new RendererXmlLoader();
         updateXMLReaderUtils(element);
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
         this.encodingDetector = encodingDetectorXmlLoader.loadOverall(element, mimeTypes, loader);
+        this.renderer = rendererXmlLoader.loadOverall(element, mimeTypes, loader);
 
-        ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector);
+        ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector, renderer);
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
         this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
@@ -187,7 +193,8 @@ public class TikaConfig {
         this.mimeTypes = getDefaultMimeTypes(loader);
         this.detector = getDefaultDetector(mimeTypes, serviceLoader);
         this.encodingDetector = getDefaultEncodingDetector(serviceLoader);
-        this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
+        this.renderer = getDefaultRenderer(serviceLoader);
+        this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector, renderer);
         this.translator = getDefaultTranslator(serviceLoader);
         this.executorService = getDefaultExecutorService();
         this.metadataFilter = new NoOpFilter();
@@ -223,7 +230,8 @@ public class TikaConfig {
             this.serviceLoader = new ServiceLoader();
             this.mimeTypes = getDefaultMimeTypes(getContextClassLoader());
             this.encodingDetector = getDefaultEncodingDetector(serviceLoader);
-            this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
+            this.renderer = getDefaultRenderer(serviceLoader);
+            this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector, renderer);
             this.detector = getDefaultDetector(mimeTypes, serviceLoader);
             this.translator = getDefaultTranslator(serviceLoader);
             this.executorService = getDefaultExecutorService();
@@ -237,6 +245,7 @@ public class TikaConfig {
                 serviceLoader = serviceLoaderFromDomElement(element, tmpServiceLoader.getLoader());
                 DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
+                RendererXmlLoader rendererLoader = new RendererXmlLoader();
                 TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
                 ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
 
@@ -244,8 +253,9 @@ public class TikaConfig {
                 this.encodingDetector =
                         encodingDetectorLoader.loadOverall(element, mimeTypes, serviceLoader);
 
+                this.renderer = rendererLoader.loadOverall(element, mimeTypes, serviceLoader);
 
-                ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector);
+                ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector, renderer);
                 this.parser = parserLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.detector = detectorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
@@ -273,9 +283,12 @@ public class TikaConfig {
         return new DefaultEncodingDetector(loader);
     }
 
+    protected static CompositeRenderer getDefaultRenderer(ServiceLoader loader) {
+        return new CompositeRenderer(loader);
+    }
     private static CompositeParser getDefaultParser(MimeTypes types, ServiceLoader loader,
-                                                    EncodingDetector encodingDetector) {
-        return new DefaultParser(types.getMediaTypeRegistry(), loader, encodingDetector);
+                                                    EncodingDetector encodingDetector, Renderer renderer) {
+        return new DefaultParser(types.getMediaTypeRegistry(), loader, encodingDetector, renderer);
     }
 
     private static Translator getDefaultTranslator(ServiceLoader loader) {
@@ -811,9 +824,11 @@ public class TikaConfig {
     private static class ParserXmlLoader extends XmlLoader<CompositeParser, Parser> {
 
         private final EncodingDetector encodingDetector;
+        private final Renderer renderer;
 
-        private ParserXmlLoader(EncodingDetector encodingDetector) {
+        private ParserXmlLoader(EncodingDetector encodingDetector, Renderer renderer) {
             this.encodingDetector = encodingDetector;
+            this.renderer = renderer;
         }
 
         boolean supportsComposite() {
@@ -860,7 +875,7 @@ public class TikaConfig {
 
         @Override
         CompositeParser createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
-            return getDefaultParser(mimeTypes, loader, encodingDetector);
+            return getDefaultParser(mimeTypes, loader, encodingDetector, renderer);
         }
 
         @Override
@@ -948,12 +963,18 @@ public class TikaConfig {
         Parser newInstance(Class<? extends Parser> loadedClass)
                 throws IllegalAccessException, InstantiationException, NoSuchMethodException,
                 InvocationTargetException {
+            Parser parser = null;
             if (AbstractEncodingDetectorParser.class.isAssignableFrom(loadedClass)) {
                 Constructor ctor = loadedClass.getConstructor(EncodingDetector.class);
-                return (Parser) ctor.newInstance(encodingDetector);
+                parser = (Parser) ctor.newInstance(encodingDetector);
             } else {
-                return loadedClass.newInstance();
+                parser = loadedClass.newInstance();
+            }
+
+            if (parser instanceof RenderingParser) {
+                ((RenderingParser)parser).setRenderer(renderer);
             }
+            return parser;
         }
 
         @Override
@@ -1306,7 +1327,7 @@ public class TikaConfig {
                     c = encodingDetectorClass.getConstructor(List.class);
                     encodingDetector = c.newInstance(childEncodingDetectors);
                 } catch (NoSuchMethodException me) {
-                    LOG.debug("couldn't find constructor for EncodingDetecto(List) for {}",
+                    LOG.debug("couldn't find constructor for EncodingDetector(List) for {}",
                             encodingDetectorClass);
                 }
             }
@@ -1320,4 +1341,91 @@ public class TikaConfig {
         }
     }
 
+    private static class RendererXmlLoader
+            extends XmlLoader<Renderer, Renderer> {
+
+        boolean supportsComposite() {
+            return true;
+        }
+
+        String getParentTagName() {
+            return "renderers";
+        }
+
+        String getLoaderTagName() {
+            return "renderer";
+        }
+
+        @Override
+        Class<? extends Renderer> getLoaderClass() {
+            return Renderer.class;
+        }
+
+
+        @Override
+        boolean isComposite(Renderer loaded) {
+            return loaded instanceof CompositeRenderer;
+        }
+
+        @Override
+        boolean isComposite(Class<? extends Renderer> loadedClass) {
+            return CompositeRenderer.class.isAssignableFrom(loadedClass);
+        }
+
+        @Override
+        Renderer preLoadOne(Class<? extends Renderer> loadedClass, String classname,
+                                    MimeTypes mimeTypes) throws TikaException {
+            // Check for classes which can't be set in config
+            // Continue with normal loading
+            return null;
+        }
+
+        @Override
+        Renderer createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultRenderer(loader);
+        }
+
+        @Override
+        Renderer createComposite(List<Renderer> renderers,
+                                                  MimeTypes mimeTypes, ServiceLoader loader) {
+            return new CompositeRenderer(renderers);
+        }
+
+        @Override
+        Renderer createComposite(Class<? extends Renderer> rendererClass,
+                                         List<Renderer> childRenderers,
+                                         Set<Class<? extends Renderer>> excludeRenderers,
+                                         Map<String, Param> params, MimeTypes mimeTypes,
+                                         ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException, InstantiationException {
+            Renderer renderer = null;
+            Constructor<? extends Renderer> c;
+
+            // Try the possible default and composite detector constructors
+            if (renderer == null) {
+                try {
+                    c = rendererClass.getConstructor(ServiceLoader.class, Collection.class);
+                    renderer = c.newInstance(loader, excludeRenderers);
+                } catch (NoSuchMethodException me) {
+                    LOG.debug("couldn't find constructor for service loader + collection for {}",
+                            renderer);
+                }
+            }
+            if (renderer == null) {
+                try {
+                    c = rendererClass.getConstructor(List.class);
+                    renderer = c.newInstance(childRenderers);
+                } catch (NoSuchMethodException me) {
+                    LOG.debug("couldn't find constructor for Renderer(List) for {}",
+                            rendererClass);
+                }
+            }
+            return renderer;
+        }
+
+        @Override
+        Renderer decorate(Renderer created, Element element) {
+            return created; // No decoration of EncodingDetectors
+        }
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java b/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java
new file mode 100644
index 000000000..1ff521aa7
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.tika.metadata;
+
+public interface Rendering {
+    String RENDERING_PREFIX = "rendering:";
+
+    Property PAGE_NUMBER = Property.externalInteger(RENDERING_PREFIX + "page_number");
+    Property RENDERED_BY = Property.externalTextBag(RENDERING_PREFIX + "Rendered-By");
+    Property RENDERED_MS = Property.externalInteger(RENDERING_PREFIX + "rendering-time-ms");
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index ba138c54f..21581a482 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -277,7 +277,7 @@ public interface TikaCoreProperties {
     Property EMBEDDED_RESOURCE_TYPE = Property.internalClosedChoise(EMBEDDED_RESOURCE_TYPE_KEY,
             EmbeddedResourceType.ATTACHMENT.toString(), EmbeddedResourceType.INLINE.toString(),
             EmbeddedResourceType.METADATA.toString(), EmbeddedResourceType.MACRO.toString(),
-            EmbeddedResourceType.THUMBNAIL.toString());
+            EmbeddedResourceType.THUMBNAIL.toString(), EmbeddedResourceType.RENDERING.toString());
     Property HAS_SIGNATURE = Property.internalBoolean("hasSignature");
 
 
@@ -302,6 +302,7 @@ public interface TikaCoreProperties {
         MACRO, //any code that is intended to be run by the application
         METADATA, //e.g. xmp, xfa
         FONT,//embedded font files
-        THUMBNAIL//TODO: set this in parsers that handle thumbnails
+        THUMBNAIL, //TODO: set this in parsers that handle thumbnails
+        RENDERING //if a file has been rendered
     }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
index 2abeeed52..336adee93 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
@@ -27,6 +27,8 @@ import org.apache.tika.detect.DefaultEncodingDetector;
 import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.ServiceLoaderUtils;
 
 /**
@@ -46,25 +48,27 @@ public class DefaultParser extends CompositeParser {
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers,
-                         EncodingDetector encodingDetector) {
-        super(registry, getDefaultParsers(loader, encodingDetector, excludeParsers));
+                         EncodingDetector encodingDetector, Renderer renderer) {
+        super(registry, getDefaultParsers(loader, encodingDetector, renderer, excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers) {
         super(registry,
-                getDefaultParsers(loader, new DefaultEncodingDetector(loader), excludeParsers));
+                getDefaultParsers(loader, new DefaultEncodingDetector(loader),
+                        new CompositeRenderer(loader), excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
-                         EncodingDetector encodingDetector) {
-        this(registry, loader, Collections.EMPTY_SET, encodingDetector);
+                         EncodingDetector encodingDetector, Renderer renderer) {
+        this(registry, loader, Collections.EMPTY_SET, encodingDetector, renderer);
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) {
-        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader));
+        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader),
+                new CompositeRenderer(loader));
     }
 
     public DefaultParser(MediaTypeRegistry registry, ClassLoader loader) {
@@ -94,6 +98,7 @@ public class DefaultParser extends CompositeParser {
      */
     private static List<Parser> getDefaultParsers(ServiceLoader loader,
                                                   EncodingDetector encodingDetector,
+                                                  Renderer renderer,
                                                   Collection<Class<? extends Parser>>
                                                           excludeParsers) {
         List<Parser> parsers =
@@ -104,6 +109,11 @@ public class DefaultParser extends CompositeParser {
                 setEncodingDetector(p, encodingDetector);
             }
         }
+        if (renderer != null) {
+            for (Parser p : parsers) {
+                setRenderer(p, renderer);
+            }
+        }
         ServiceLoaderUtils.sortLoadedClasses(parsers);
         return parsers;
     }
@@ -122,6 +132,18 @@ public class DefaultParser extends CompositeParser {
         }
     }
 
+    private static void setRenderer(Parser p, Renderer renderer) {
+        if (p instanceof RenderingParser) {
+            ((RenderingParser) p).setRenderer(renderer);
+        } else if (p instanceof CompositeParser) {
+            for (Parser child : ((CompositeParser) p).getAllComponentParsers()) {
+                setRenderer(child, renderer);
+            }
+        } else if (p instanceof ParserDecorator) {
+            setRenderer(((ParserDecorator) p).getWrappedParser(), renderer);
+        }
+    }
+
     @Override
     public Map<MediaType, Parser> getParsers(ParseContext context) {
         Map<MediaType, Parser> map = super.getParsers(context);
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java b/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java
new file mode 100644
index 000000000..0daae6be1
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.tika.parser;
+
+import org.apache.tika.renderer.Renderer;
+
+public interface RenderingParser {
+
+    void setRenderer(Renderer renderer);
+
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java b/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java
new file mode 100644
index 000000000..ff5a52061
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.tika.renderer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+public class CompositeRenderer implements Renderer, Initializable {
+
+    private Map<MediaType, Renderer> rendererMap = new HashMap<>();
+    private List<Renderer> renderers;
+
+    public CompositeRenderer(ServiceLoader serviceLoader) {
+        this(getDefaultRenderers(serviceLoader));
+    }
+
+    public CompositeRenderer(List<Renderer> renderers) {
+        this.renderers = renderers;
+    }
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return rendererMap.keySet();
+    }
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext)
+            throws IOException, TikaException {
+        String mediaTypeString = metadata.get(TikaCoreProperties.TYPE);
+        if (mediaTypeString == null) {
+            throw new TikaException("need to specify file type in metadata");
+        }
+        MediaType mt = MediaType.parse(mediaTypeString);
+        if (mt == null) {
+            throw new TikaException("can't parse mediaType: " + mediaTypeString);
+        }
+        Renderer renderer = rendererMap.get(mt);
+        if (renderer == null) {
+            throw new TikaException("I regret I can't find a renderer for " + mt);
+        }
+        return renderer.render(is, metadata, parseContext);
+    }
+
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        Map<MediaType, Renderer> tmp = new ConcurrentHashMap<>();
+        ParseContext empty = new ParseContext();
+        for (Renderer renderer : renderers) {
+            for (MediaType mt : renderer.getSupportedTypes(empty)) {
+                tmp.put(mt, renderer);
+            }
+        }
+        rendererMap = Collections.unmodifiableMap(tmp);
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+
+    }
+
+    private static List<Renderer> getDefaultRenderers(ServiceLoader loader) {
+        List<Renderer> staticRenderers =
+                loader.loadStaticServiceProviders(Renderer.class);
+
+        ServiceLoaderUtils.sortLoadedClasses(staticRenderers);
+        return staticRenderers;
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java
new file mode 100644
index 000000000..9ed61e342
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.nio.file.Path;
+
+import org.apache.tika.metadata.Metadata;
+
+public class RenderResult {
+
+    public enum STATUS {
+        SUCCESS,
+        EXCEPTION,
+        TIMEOUT
+    }
+    private final STATUS status;
+
+    private final Path path;
+    private final Metadata metadata;
+
+    public RenderResult(STATUS status, Path path, Metadata metadata) {
+        this.status = status;
+        this.path = path;
+        this.metadata = metadata;
+    }
+
+    public Path getPath() {
+        return path;
+    }
+
+    public Metadata getMetadata() {
+        return metadata;
+    }
+
+    public STATUS getStatus() {
+        return status;
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java
new file mode 100644
index 000000000..12d60d3da
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.io.TemporaryResources;
+
+public class RenderResults implements Closeable {
+
+    private List<RenderResult> results = new ArrayList<>();
+
+    private final TemporaryResources tmp;
+    public RenderResults(TemporaryResources tmp) {
+        this.tmp = tmp;
+    }
+    public void add(RenderResult result) {
+        tmp.addResource(new Closeable() {
+            @Override
+            public void close() throws IOException {
+                Files.delete(result.getPath());
+            }
+        });
+        results.add(result);
+    }
+
+    public List<RenderResult> getResults() {
+        return results;
+    }
+
+    @Override
+    public void close() throws IOException {
+        tmp.close();
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java b/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java
new file mode 100644
index 000000000..5f7cb536b
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+
+/**
+ * Interface for a renderer.  This should be flexible enough to run on the initial design: PDF pages
+ * but also on portions of PDF pages as well as on other document types.
+ *
+ */
+public interface Renderer extends Serializable {
+
+
+    /**
+     * Returns the set of media types supported by this renderer when used
+     * with the given parse context.
+     *
+     * @param context parse context
+     * @return immutable set of media types
+     * @since Apache Tika 2.5.0
+     */
+    Set<MediaType> getSupportedTypes(ParseContext context);
+
+    RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext) throws IOException,
+            TikaException;
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 93dfbd119..02449d53d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -109,6 +109,7 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.renderer.RenderResults;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -140,6 +141,8 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     final EmbeddedDocumentExtractor embeddedDocumentExtractor;
     final PDFParserConfig config;
     final Parser ocrParser;
+
+    final RenderResults renderResults;
     /**
      * Format used for signature dates
      * TODO Make this thread-safe
@@ -157,11 +160,12 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     int totalCharsPerPage = 0;
 
     AbstractPDF2XHTML(PDDocument pdDocument, ContentHandler handler, ParseContext context,
-                      Metadata metadata, PDFParserConfig config) throws IOException {
+                      Metadata metadata, RenderResults renderResults, PDFParserConfig config) throws IOException {
         this.pdDocument = pdDocument;
         this.xhtml = new XHTMLContentHandler(handler, metadata);
         this.context = context;
         this.metadata = metadata;
+        this.renderResults = renderResults;
         this.config = config;
         embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
         if (config.getOcrStrategy() == NO_OCR) {
@@ -482,6 +486,23 @@ class AbstractPDF2XHTML extends PDFTextStripper {
             }
         }
 
+        try (TemporaryResources tmp = new TemporaryResources()) {
+            Path tmpFile = renderPage(tmp);
+
+            try (InputStream is = TikaInputStream.get(tmpFile)) {
+                metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
+                        ocrImageMediaType.toString());
+                ocrParser.parse(is, new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
+                        metadata, context);
+            }
+        } catch (IOException e) {
+            handleCatchableIOE(e);
+        } catch (SAXException e) {
+            throw new IOException("error writing OCR content from PDF", e);
+        }
+    }
+
+    private Path renderPage(TemporaryResources tmpResources) {
         PDFRenderer renderer =
                 config.getOcrRenderingStrategy() == PDFParserConfig.OCR_RENDERING_STRATEGY.NO_TEXT ?
                         new NoTextPDFRenderer(pdDocument) : new PDFRenderer(pdDocument);
@@ -509,17 +530,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                         ExceptionUtils.getStackTrace(e));
                 return;
             }
-            try (InputStream is = TikaInputStream.get(tmpFile)) {
-                metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
-                        ocrImageMediaType.toString());
-                ocrParser.parse(is, new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
-                        metadata, context);
-            }
-        } catch (IOException e) {
-            handleCatchableIOE(e);
-        } catch (SAXException e) {
-            throw new IOException("error writing OCR content from PDF", e);
-        }
     }
 
     @Override
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
index 7493253bb..1930a1ae9 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
@@ -29,6 +29,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.RenderResults;
 
 
 /**
@@ -38,7 +39,7 @@ import org.apache.tika.parser.ParseContext;
 class OCR2XHTML extends AbstractPDF2XHTML {
 
     private OCR2XHTML(PDDocument document, ContentHandler handler, ParseContext context,
-                      Metadata metadata, PDFParserConfig config) throws IOException {
+                      Metadata metadata, RenderResults renderResults, PDFParserConfig config) throws IOException {
         super(document, handler, context, metadata, config);
     }
 
@@ -53,11 +54,12 @@ class OCR2XHTML extends AbstractPDF2XHTML {
      * @throws TikaException if there was an exception outside of per page processing
      */
     public static void process(PDDocument document, ContentHandler handler, ParseContext context,
-                               Metadata metadata, PDFParserConfig config)
+                               Metadata metadata, RenderResults renderResults,
+                               PDFParserConfig config)
             throws SAXException, TikaException {
         OCR2XHTML ocr2XHTML = null;
         try {
-            ocr2XHTML = new OCR2XHTML(document, handler, context, metadata, config);
+            ocr2XHTML = new OCR2XHTML(document, handler, context, metadata, renderResults, config);
             ocr2XHTML.writeText(document, new Writer() {
                 @Override
                 public void write(char[] cbuf, int off, int len) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index 93d1b7e81..2b4f1368a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -40,6 +40,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.RenderResults;
 
 /**
  * Utility class that overrides the {@link PDFTextStripper} functionality
@@ -64,8 +65,8 @@ class PDF2XHTML extends AbstractPDF2XHTML {
     private AtomicInteger inlineImageCounter = new AtomicInteger(0);
 
     PDF2XHTML(PDDocument document, ContentHandler handler, ParseContext context, Metadata metadata,
-              PDFParserConfig config) throws IOException {
-        super(document, handler, context, metadata, config);
+              RenderResults renderResults, PDFParserConfig config) throws IOException {
+        super(document, handler, context, metadata, renderResults, config);
     }
 
     /**
@@ -79,7 +80,8 @@ class PDF2XHTML extends AbstractPDF2XHTML {
      * @throws TikaException if there was an exception outside of per page processing
      */
     public static void process(PDDocument document, ContentHandler handler, ParseContext context,
-                               Metadata metadata, PDFParserConfig config)
+                               Metadata metadata, RenderResults renderResults,
+                               PDFParserConfig config)
             throws SAXException, TikaException {
         PDF2XHTML pdf2XHTML = null;
         try {
@@ -90,7 +92,8 @@ class PDF2XHTML extends AbstractPDF2XHTML {
                 pdf2XHTML =
                         new AngleDetectingPDF2XHTML(document, handler, context, metadata, config);
             } else {
-                pdf2XHTML = new PDF2XHTML(document, handler, context, metadata, config);
+                pdf2XHTML = new PDF2XHTML(document, handler, context, metadata, renderResults,
+                        config);
             }
             config.configure(pdf2XHTML);
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 3835179b1..79b79f14f 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -64,6 +64,9 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.RenderingParser;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.sax.XHTMLContentHandler;
 
 /**
@@ -96,7 +99,7 @@ import org.apache.tika.sax.XHTMLContentHandler;
  * If your PDFs contain marked content or tags, consider
  * {@link PDFParserConfig#setExtractMarkedContent(boolean)}
  */
-public class PDFParser extends AbstractParser implements Initializable {
+public class PDFParser extends AbstractParser implements RenderingParser, Initializable {
 
     /**
      * Metadata key for giving the document password to the parser.
@@ -149,12 +152,17 @@ public class PDFParser extends AbstractParser implements Initializable {
                 pdfDocument = getPDDocument(new CloseShieldInputStream(stream), password,
                         memoryUsageSetting, metadata, context);
             }
+            tstream.setOpenContainer(pdfDocument);
             metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(pdfDocument.isEncrypted()));
 
             metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString());
             extractMetadata(pdfDocument, metadata, context);
             AccessChecker checker = localConfig.getAccessChecker();
             checker.check(metadata);
+            RenderResults renderResults = null;
+            if (localConfig.getRenderer().getSupportedTypes(context).contains(MEDIA_TYPE)) {
+                renderResults = renderPDF(tstream, context, localConfig);
+            }
             if (handler != null) {
                 boolean hasXFA = hasXFA(pdfDocument);
                 metadata.set(PDF.HAS_XFA, Boolean.toString(hasXFA));
@@ -166,12 +174,15 @@ public class PDFParser extends AbstractParser implements Initializable {
                     handleXFAOnly(pdfDocument, handler, metadata, context);
                 } else if (localConfig.getOcrStrategy()
                         .equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) {
-                    OCR2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
+                    OCR2XHTML.process(pdfDocument, handler, context, metadata, renderResults,
+                            localConfig);
                 } else if (hasMarkedContent && localConfig.isExtractMarkedContent()) {
                     PDFMarkedContent2XHTML
-                            .process(pdfDocument, handler, context, metadata, localConfig);
+                            .process(pdfDocument, handler, context, metadata, renderResults,
+                                    localConfig);
                 } else {
-                    PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
+                    PDF2XHTML.process(pdfDocument, handler, context, metadata, renderResults,
+                            localConfig);
                 }
             }
         } catch (InvalidPasswordException e) {
@@ -184,6 +195,14 @@ public class PDFParser extends AbstractParser implements Initializable {
         }
     }
 
+    private RenderResults renderPDF(TikaInputStream tstream,
+                                    ParseContext parseContext, PDFParserConfig localConfig)
+            throws IOException, TikaException {
+        Metadata metadata = new Metadata();
+        metadata.set(TikaCoreProperties.TYPE, MEDIA_TYPE.toString());
+        return localConfig.getRenderer().render(tstream, metadata, parseContext);
+    }
+
 
     protected PDDocument getPDDocument(InputStream inputStream, String password,
                                        MemoryUsageSetting memoryUsageSetting, Metadata metadata,
@@ -622,6 +641,11 @@ public class PDFParser extends AbstractParser implements Initializable {
         //no-op
     }
 
+    @Override
+    public void setRenderer(Renderer renderer) {
+        defaultConfig.setRenderer(renderer);
+    }
+
     /**
      * Copied from AcroformDefaultFixup minus generation of appearances and handling of orphan
      * widgets, which we don't need.
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index af0e9617b..3b80ac061 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -29,6 +29,7 @@ import org.apache.pdfbox.rendering.ImageType;
 import org.apache.pdfbox.text.PDFTextStripper;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.renderer.Renderer;
 
 /**
  * Config for PDFParser.
@@ -130,6 +131,8 @@ public class PDFParserConfig implements Serializable {
 
     private boolean detectAngles = false;
 
+    private Renderer renderer;
+
     /**
      * @return whether or not to extract only inline image metadata and not render the images
      */
@@ -857,6 +860,14 @@ public class PDFParserConfig implements Serializable {
         return getMaxMainMemoryBytes() == config.getMaxMainMemoryBytes();
     }
 
+    public void setRenderer(Renderer renderer) {
+        this.renderer = renderer;
+    }
+
+    public Renderer getRenderer() {
+        return renderer;
+    }
+
     @Override
     public int hashCode() {
         int result = (isEnableAutoSpace() ? 1 : 0);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java
new file mode 100644
index 000000000..4f7a1d149
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer.pdf;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.tools.imageio.ImageIOUtil;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.Rendering;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.RenderResult;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.Renderer;
+
+public class PDFBoxRenderer implements Renderer {
+
+    Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+
+    /**
+     * This is the amount of time it takes for PDFBox to render the page
+     */
+    public static Property PDFBOX_RENDERING_TIME_MS =
+            Property.externalReal(Rendering.RENDERING_PREFIX + "pdfbox-rendering-ms");
+
+    /**
+     * This is the amount of time it takes for PDFBox/java to write the image after
+     * it has been rendered into a BufferedImage.  Some formats take much longer
+     * to encode than others.
+     */
+    public static Property PDFBOX_IMAGE_WRITING_TIME_MS =
+            Property.externalReal(Rendering.RENDERING_PREFIX + "pdfbox-image-writing-ms");
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    private int dpi = 300;
+    private ImageType imageType = ImageType.GRAY;
+    private String imageFormatName = "tiff";
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext) throws IOException,
+            TikaException {
+
+
+        PDDocument pdDocument;
+        TikaInputStream tis = TikaInputStream.get(is);
+        boolean mustClose = false;
+        if (tis.getOpenContainer() != null) {
+            pdDocument = (PDDocument) tis.getOpenContainer();
+        } else {
+            pdDocument = PDDocument.load(is);
+            mustClose = true;
+        }
+        RenderResults results = new RenderResults(new TemporaryResources());
+        try {
+
+            PDFRenderer renderer = new PDFRenderer(pdDocument);
+
+            for (int i = 0; i < pdDocument.getNumberOfPages(); i++) {
+                Metadata m = new Metadata();
+                m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+                        TikaCoreProperties.EmbeddedResourceType.RENDERING.name());
+                try {
+                    m.set(Rendering.PAGE_NUMBER, i + 1);
+                    Path imagePath = renderPage(renderer, i, m);
+                    results.add(new RenderResult(RenderResult.STATUS.SUCCESS, imagePath, m));
+                } catch (IOException e) {
+                    EmbeddedDocumentUtil.recordException(e, m);
+                    results.add(new RenderResult(RenderResult.STATUS.EXCEPTION, null, m));
+                }
+            }
+        } finally {
+            if (mustClose) {
+                pdDocument.close();
+            }
+        }
+        return results;
+    }
+
+    private Path renderPage(PDFRenderer renderer, int pageIndex, Metadata metadata)
+            throws IOException {
+
+        Path tmpFile = Files.createTempFile("tika-pdfbox-rendering-",
+                "-" + (pageIndex + 1) + "." + imageFormatName);
+        try {
+            long start = System.currentTimeMillis();
+            BufferedImage image = renderer.renderImageWithDPI(pageIndex, dpi, imageType);
+            long renderingElapsed = System.currentTimeMillis() - start;
+            metadata.set(PDFBOX_RENDERING_TIME_MS, renderingElapsed);
+            start = System.currentTimeMillis();
+            try (OutputStream os = Files.newOutputStream(tmpFile)) {
+                ImageIOUtil.writeImage(image, imageFormatName, os, dpi);
+            }
+            long elapsedWrite = System.currentTimeMillis() - start;
+            metadata.set(PDFBOX_IMAGE_WRITING_TIME_MS, elapsedWrite);
+            metadata.set(Rendering.RENDERED_MS, renderingElapsed + elapsedWrite);
+        } catch (SecurityException e) {
+            //throw SecurityExceptions immediately
+            throw e;
+        } catch (IOException | RuntimeException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        return tmpFile;
+    }
+
+}