You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/26 21:51:02 UTC

[tika] branch TIKA-3571 created (now c3261d6d0)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-3571
in repository https://gitbox.apache.org/repos/asf/tika.git


      at c3261d6d0 TIKA-3571 -- WIP, checkpoint commit. Do not merge.

This branch includes the following new commits:

     new c3261d6d0 TIKA-3571 -- WIP, checkpoint commit. Do not merge.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tika] 01/01: TIKA-3571 -- WIP, checkpoint commit. Do not merge.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-3571
in repository https://gitbox.apache.org/repos/asf/tika.git

commit c3261d6d0b272f452b1343e2491da7b75adb64db
Author: tallison <ta...@apache.org>
AuthorDate: Tue Apr 26 17:50:44 2022 -0400

    TIKA-3571 -- WIP, checkpoint commit. Do not merge.
---
 .../java/org/apache/tika/config/TikaConfig.java    | 130 +++++++++++++++++--
 .../java/org/apache/tika/metadata/Rendering.java   |  27 ++++
 .../apache/tika/metadata/TikaCoreProperties.java   |   5 +-
 .../java/org/apache/tika/parser/DefaultParser.java |  34 ++++-
 .../org/apache/tika/parser/RenderingParser.java    |  24 ++++
 .../apache/tika/renderer/CompositeRenderer.java    |  99 +++++++++++++++
 .../org/apache/tika/renderer/RenderResult.java     |  52 ++++++++
 .../org/apache/tika/renderer/RenderResults.java    |  53 ++++++++
 .../java/org/apache/tika/renderer/Renderer.java    |  49 +++++++
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  34 +++--
 .../java/org/apache/tika/parser/pdf/OCR2XHTML.java |   8 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |  11 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |  32 ++++-
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  11 ++
 .../apache/tika/renderer/pdf/PDFBoxRenderer.java   | 141 +++++++++++++++++++++
 15 files changed, 668 insertions(+), 42 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 1606262f6..d16f6f171 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -74,7 +74,10 @@ import org.apache.tika.parser.CompositeParser;
 import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.RenderingParser;
 import org.apache.tika.parser.multiple.AbstractMultipleParser;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.AnnotationUtils;
 import org.apache.tika.utils.XMLReaderUtils;
 
@@ -95,6 +98,7 @@ public class TikaConfig {
     private final MimeTypes mimeTypes;
     private final ExecutorService executorService;
     private final EncodingDetector encodingDetector;
+    private final Renderer renderer;
     private final MetadataFilter metadataFilter;
     private final AutoDetectParserConfig autoDetectParserConfig;
 
@@ -155,12 +159,14 @@ public class TikaConfig {
         TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
         ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
         EncodingDetectorXmlLoader encodingDetectorXmlLoader = new EncodingDetectorXmlLoader();
+        RendererXmlLoader rendererXmlLoader = new RendererXmlLoader();
         updateXMLReaderUtils(element);
         this.mimeTypes = typesFromDomElement(element);
         this.detector = detectorLoader.loadOverall(element, mimeTypes, loader);
         this.encodingDetector = encodingDetectorXmlLoader.loadOverall(element, mimeTypes, loader);
+        this.renderer = rendererXmlLoader.loadOverall(element, mimeTypes, loader);
 
-        ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector);
+        ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector, renderer);
         this.parser = parserLoader.loadOverall(element, mimeTypes, loader);
         this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
         this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
@@ -187,7 +193,8 @@ public class TikaConfig {
         this.mimeTypes = getDefaultMimeTypes(loader);
         this.detector = getDefaultDetector(mimeTypes, serviceLoader);
         this.encodingDetector = getDefaultEncodingDetector(serviceLoader);
-        this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
+        this.renderer = getDefaultRenderer(serviceLoader);
+        this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector, renderer);
         this.translator = getDefaultTranslator(serviceLoader);
         this.executorService = getDefaultExecutorService();
         this.metadataFilter = new NoOpFilter();
@@ -223,7 +230,8 @@ public class TikaConfig {
             this.serviceLoader = new ServiceLoader();
             this.mimeTypes = getDefaultMimeTypes(getContextClassLoader());
             this.encodingDetector = getDefaultEncodingDetector(serviceLoader);
-            this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector);
+            this.renderer = getDefaultRenderer(serviceLoader);
+            this.parser = getDefaultParser(mimeTypes, serviceLoader, encodingDetector, renderer);
             this.detector = getDefaultDetector(mimeTypes, serviceLoader);
             this.translator = getDefaultTranslator(serviceLoader);
             this.executorService = getDefaultExecutorService();
@@ -237,6 +245,7 @@ public class TikaConfig {
                 serviceLoader = serviceLoaderFromDomElement(element, tmpServiceLoader.getLoader());
                 DetectorXmlLoader detectorLoader = new DetectorXmlLoader();
                 EncodingDetectorXmlLoader encodingDetectorLoader = new EncodingDetectorXmlLoader();
+                RendererXmlLoader rendererLoader = new RendererXmlLoader();
                 TranslatorXmlLoader translatorLoader = new TranslatorXmlLoader();
                 ExecutorServiceXmlLoader executorLoader = new ExecutorServiceXmlLoader();
 
@@ -244,8 +253,9 @@ public class TikaConfig {
                 this.encodingDetector =
                         encodingDetectorLoader.loadOverall(element, mimeTypes, serviceLoader);
 
+                this.renderer = rendererLoader.loadOverall(element, mimeTypes, serviceLoader);
 
-                ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector);
+                ParserXmlLoader parserLoader = new ParserXmlLoader(encodingDetector, renderer);
                 this.parser = parserLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.detector = detectorLoader.loadOverall(element, mimeTypes, serviceLoader);
                 this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
@@ -273,9 +283,12 @@ public class TikaConfig {
         return new DefaultEncodingDetector(loader);
     }
 
+    protected static CompositeRenderer getDefaultRenderer(ServiceLoader loader) {
+        return new CompositeRenderer(loader);
+    }
     private static CompositeParser getDefaultParser(MimeTypes types, ServiceLoader loader,
-                                                    EncodingDetector encodingDetector) {
-        return new DefaultParser(types.getMediaTypeRegistry(), loader, encodingDetector);
+                                                    EncodingDetector encodingDetector, Renderer renderer) {
+        return new DefaultParser(types.getMediaTypeRegistry(), loader, encodingDetector, renderer);
     }
 
     private static Translator getDefaultTranslator(ServiceLoader loader) {
@@ -811,9 +824,11 @@ public class TikaConfig {
     private static class ParserXmlLoader extends XmlLoader<CompositeParser, Parser> {
 
         private final EncodingDetector encodingDetector;
+        private final Renderer renderer;
 
-        private ParserXmlLoader(EncodingDetector encodingDetector) {
+        private ParserXmlLoader(EncodingDetector encodingDetector, Renderer renderer) {
             this.encodingDetector = encodingDetector;
+            this.renderer = renderer;
         }
 
         boolean supportsComposite() {
@@ -860,7 +875,7 @@ public class TikaConfig {
 
         @Override
         CompositeParser createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
-            return getDefaultParser(mimeTypes, loader, encodingDetector);
+            return getDefaultParser(mimeTypes, loader, encodingDetector, renderer);
         }
 
         @Override
@@ -948,12 +963,18 @@ public class TikaConfig {
         Parser newInstance(Class<? extends Parser> loadedClass)
                 throws IllegalAccessException, InstantiationException, NoSuchMethodException,
                 InvocationTargetException {
+            Parser parser = null;
             if (AbstractEncodingDetectorParser.class.isAssignableFrom(loadedClass)) {
                 Constructor ctor = loadedClass.getConstructor(EncodingDetector.class);
-                return (Parser) ctor.newInstance(encodingDetector);
+                parser = (Parser) ctor.newInstance(encodingDetector);
             } else {
-                return loadedClass.newInstance();
+                parser = loadedClass.newInstance();
+            }
+
+            if (parser instanceof RenderingParser) {
+                ((RenderingParser)parser).setRenderer(renderer);
             }
+            return parser;
         }
 
         @Override
@@ -1306,7 +1327,7 @@ public class TikaConfig {
                     c = encodingDetectorClass.getConstructor(List.class);
                     encodingDetector = c.newInstance(childEncodingDetectors);
                 } catch (NoSuchMethodException me) {
-                    LOG.debug("couldn't find constructor for EncodingDetecto(List) for {}",
+                    LOG.debug("couldn't find constructor for EncodingDetector(List) for {}",
                             encodingDetectorClass);
                 }
             }
@@ -1320,4 +1341,91 @@ public class TikaConfig {
         }
     }
 
+    private static class RendererXmlLoader
+            extends XmlLoader<Renderer, Renderer> {
+
+        boolean supportsComposite() {
+            return true;
+        }
+
+        String getParentTagName() {
+            return "renderers";
+        }
+
+        String getLoaderTagName() {
+            return "renderer";
+        }
+
+        @Override
+        Class<? extends Renderer> getLoaderClass() {
+            return Renderer.class;
+        }
+
+
+        @Override
+        boolean isComposite(Renderer loaded) {
+            return loaded instanceof CompositeRenderer;
+        }
+
+        @Override
+        boolean isComposite(Class<? extends Renderer> loadedClass) {
+            return CompositeRenderer.class.isAssignableFrom(loadedClass);
+        }
+
+        @Override
+        Renderer preLoadOne(Class<? extends Renderer> loadedClass, String classname,
+                                    MimeTypes mimeTypes) throws TikaException {
+            // Check for classes which can't be set in config
+            // Continue with normal loading
+            return null;
+        }
+
+        @Override
+        Renderer createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+            return getDefaultRenderer(loader);
+        }
+
+        @Override
+        Renderer createComposite(List<Renderer> renderers,
+                                                  MimeTypes mimeTypes, ServiceLoader loader) {
+            return new CompositeRenderer(renderers);
+        }
+
+        @Override
+        Renderer createComposite(Class<? extends Renderer> rendererClass,
+                                         List<Renderer> childRenderers,
+                                         Set<Class<? extends Renderer>> excludeRenderers,
+                                         Map<String, Param> params, MimeTypes mimeTypes,
+                                         ServiceLoader loader)
+                throws InvocationTargetException, IllegalAccessException, InstantiationException {
+            Renderer renderer = null;
+            Constructor<? extends Renderer> c;
+
+            // Try the possible default and composite detector constructors
+            if (renderer == null) {
+                try {
+                    c = rendererClass.getConstructor(ServiceLoader.class, Collection.class);
+                    renderer = c.newInstance(loader, excludeRenderers);
+                } catch (NoSuchMethodException me) {
+                    LOG.debug("couldn't find constructor for service loader + collection for {}",
+                            renderer);
+                }
+            }
+            if (renderer == null) {
+                try {
+                    c = rendererClass.getConstructor(List.class);
+                    renderer = c.newInstance(childRenderers);
+                } catch (NoSuchMethodException me) {
+                    LOG.debug("couldn't find constructor for Renderer(List) for {}",
+                            rendererClass);
+                }
+            }
+            return renderer;
+        }
+
+        @Override
+        Renderer decorate(Renderer created, Element element) {
+            return created; // No decoration of EncodingDetectors
+        }
+    }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java b/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java
new file mode 100644
index 000000000..1ff521aa7
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Rendering.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package org.apache.tika.metadata;
+
+public interface Rendering {
+    String RENDERING_PREFIX = "rendering:";
+
+    Property PAGE_NUMBER = Property.externalInteger(RENDERING_PREFIX + "page_number");
+    Property RENDERED_BY = Property.externalTextBag(RENDERING_PREFIX + "Rendered-By");
+    Property RENDERED_MS = Property.externalInteger(RENDERING_PREFIX + "rendering-time-ms");
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index ba138c54f..21581a482 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -277,7 +277,7 @@ public interface TikaCoreProperties {
     Property EMBEDDED_RESOURCE_TYPE = Property.internalClosedChoise(EMBEDDED_RESOURCE_TYPE_KEY,
             EmbeddedResourceType.ATTACHMENT.toString(), EmbeddedResourceType.INLINE.toString(),
             EmbeddedResourceType.METADATA.toString(), EmbeddedResourceType.MACRO.toString(),
-            EmbeddedResourceType.THUMBNAIL.toString());
+            EmbeddedResourceType.THUMBNAIL.toString(), EmbeddedResourceType.RENDERING.toString());
     Property HAS_SIGNATURE = Property.internalBoolean("hasSignature");
 
 
@@ -302,6 +302,7 @@ public interface TikaCoreProperties {
         MACRO, //any code that is intended to be run by the application
         METADATA, //e.g. xmp, xfa
         FONT,//embedded font files
-        THUMBNAIL//TODO: set this in parsers that handle thumbnails
+        THUMBNAIL, //TODO: set this in parsers that handle thumbnails
+        RENDERING //if a file has been rendered
     }
 }
diff --git a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
index 2abeeed52..336adee93 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/DefaultParser.java
@@ -27,6 +27,8 @@ import org.apache.tika.detect.DefaultEncodingDetector;
 import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
+import org.apache.tika.renderer.CompositeRenderer;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.utils.ServiceLoaderUtils;
 
 /**
@@ -46,25 +48,27 @@ public class DefaultParser extends CompositeParser {
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers,
-                         EncodingDetector encodingDetector) {
-        super(registry, getDefaultParsers(loader, encodingDetector, excludeParsers));
+                         EncodingDetector encodingDetector, Renderer renderer) {
+        super(registry, getDefaultParsers(loader, encodingDetector, renderer, excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
                          Collection<Class<? extends Parser>> excludeParsers) {
         super(registry,
-                getDefaultParsers(loader, new DefaultEncodingDetector(loader), excludeParsers));
+                getDefaultParsers(loader, new DefaultEncodingDetector(loader),
+                        new CompositeRenderer(loader), excludeParsers));
         this.loader = loader;
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader,
-                         EncodingDetector encodingDetector) {
-        this(registry, loader, Collections.EMPTY_SET, encodingDetector);
+                         EncodingDetector encodingDetector, Renderer renderer) {
+        this(registry, loader, Collections.EMPTY_SET, encodingDetector, renderer);
     }
 
     public DefaultParser(MediaTypeRegistry registry, ServiceLoader loader) {
-        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader));
+        this(registry, loader, Collections.EMPTY_SET, new DefaultEncodingDetector(loader),
+                new CompositeRenderer(loader));
     }
 
     public DefaultParser(MediaTypeRegistry registry, ClassLoader loader) {
@@ -94,6 +98,7 @@ public class DefaultParser extends CompositeParser {
      */
     private static List<Parser> getDefaultParsers(ServiceLoader loader,
                                                   EncodingDetector encodingDetector,
+                                                  Renderer renderer,
                                                   Collection<Class<? extends Parser>>
                                                           excludeParsers) {
         List<Parser> parsers =
@@ -104,6 +109,11 @@ public class DefaultParser extends CompositeParser {
                 setEncodingDetector(p, encodingDetector);
             }
         }
+        if (renderer != null) {
+            for (Parser p : parsers) {
+                setRenderer(p, renderer);
+            }
+        }
         ServiceLoaderUtils.sortLoadedClasses(parsers);
         return parsers;
     }
@@ -122,6 +132,18 @@ public class DefaultParser extends CompositeParser {
         }
     }
 
+    private static void setRenderer(Parser p, Renderer renderer) {
+        if (p instanceof RenderingParser) {
+            ((RenderingParser) p).setRenderer(renderer);
+        } else if (p instanceof CompositeParser) {
+            for (Parser child : ((CompositeParser) p).getAllComponentParsers()) {
+                setRenderer(child, renderer);
+            }
+        } else if (p instanceof ParserDecorator) {
+            setRenderer(((ParserDecorator) p).getWrappedParser(), renderer);
+        }
+    }
+
     @Override
     public Map<MediaType, Parser> getParsers(ParseContext context) {
         Map<MediaType, Parser> map = super.getParsers(context);
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java b/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java
new file mode 100644
index 000000000..0daae6be1
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/parser/RenderingParser.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.tika.parser;
+
+import org.apache.tika.renderer.Renderer;
+
+public interface RenderingParser {
+
+    void setRenderer(Renderer renderer);
+
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java b/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java
new file mode 100644
index 000000000..ff5a52061
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/CompositeRenderer.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.tika.renderer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.tika.config.Initializable;
+import org.apache.tika.config.InitializableProblemHandler;
+import org.apache.tika.config.Param;
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.exception.TikaConfigException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+public class CompositeRenderer implements Renderer, Initializable {
+
+    private Map<MediaType, Renderer> rendererMap = new HashMap<>();
+    private List<Renderer> renderers;
+
+    public CompositeRenderer(ServiceLoader serviceLoader) {
+        this(getDefaultRenderers(serviceLoader));
+    }
+
+    public CompositeRenderer(List<Renderer> renderers) {
+        this.renderers = renderers;
+    }
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return rendererMap.keySet();
+    }
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext)
+            throws IOException, TikaException {
+        String mediaTypeString = metadata.get(TikaCoreProperties.TYPE);
+        if (mediaTypeString == null) {
+            throw new TikaException("need to specify file type in metadata");
+        }
+        MediaType mt = MediaType.parse(mediaTypeString);
+        if (mt == null) {
+            throw new TikaException("can't parse mediaType: " + mediaTypeString);
+        }
+        Renderer renderer = rendererMap.get(mt);
+        if (renderer == null) {
+            throw new TikaException("I regret I can't find a renderer for " + mt);
+        }
+        return renderer.render(is, metadata, parseContext);
+    }
+
+    @Override
+    public void initialize(Map<String, Param> params) throws TikaConfigException {
+        Map<MediaType, Renderer> tmp = new ConcurrentHashMap<>();
+        ParseContext empty = new ParseContext();
+        for (Renderer renderer : renderers) {
+            for (MediaType mt : renderer.getSupportedTypes(empty)) {
+                tmp.put(mt, renderer);
+            }
+        }
+        rendererMap = Collections.unmodifiableMap(tmp);
+    }
+
+    @Override
+    public void checkInitialization(InitializableProblemHandler problemHandler)
+            throws TikaConfigException {
+
+    }
+
+    private static List<Renderer> getDefaultRenderers(ServiceLoader loader) {
+        List<Renderer> staticRenderers =
+                loader.loadStaticServiceProviders(Renderer.class);
+
+        ServiceLoaderUtils.sortLoadedClasses(staticRenderers);
+        return staticRenderers;
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java
new file mode 100644
index 000000000..9ed61e342
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderResult.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.nio.file.Path;
+
+import org.apache.tika.metadata.Metadata;
+
+public class RenderResult {
+
+    public enum STATUS {
+        SUCCESS,
+        EXCEPTION,
+        TIMEOUT
+    }
+    private final STATUS status;
+
+    private final Path path;
+    private final Metadata metadata;
+
+    public RenderResult(STATUS status, Path path, Metadata metadata) {
+        this.status = status;
+        this.path = path;
+        this.metadata = metadata;
+    }
+
+    public Path getPath() {
+        return path;
+    }
+
+    public Metadata getMetadata() {
+        return metadata;
+    }
+
+    public STATUS getStatus() {
+        return status;
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java b/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java
new file mode 100644
index 000000000..12d60d3da
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/RenderResults.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.io.TemporaryResources;
+
+public class RenderResults implements Closeable {
+
+    private List<RenderResult> results = new ArrayList<>();
+
+    private final TemporaryResources tmp;
+    public RenderResults(TemporaryResources tmp) {
+        this.tmp = tmp;
+    }
+    public void add(RenderResult result) {
+        tmp.addResource(new Closeable() {
+            @Override
+            public void close() throws IOException {
+                Files.delete(result.getPath());
+            }
+        });
+        results.add(result);
+    }
+
+    public List<RenderResult> getResults() {
+        return results;
+    }
+
+    @Override
+    public void close() throws IOException {
+        tmp.close();
+    }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java b/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java
new file mode 100644
index 000000000..5f7cb536b
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/renderer/Renderer.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+
+/**
+ * Interface for a renderer.  This should be flexible enough to run on the initial design: PDF pages
+ * but also on portions of PDF pages as well as on other document types.
+ *
+ */
+public interface Renderer extends Serializable {
+
+
+    /**
+     * Returns the set of media types supported by this renderer when used
+     * with the given parse context.
+     *
+     * @param context parse context
+     * @return immutable set of media types
+     * @since Apache Tika 2.5.0
+     */
+    Set<MediaType> getSupportedTypes(ParseContext context);
+
+    RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext) throws IOException,
+            TikaException;
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 93dfbd119..02449d53d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -109,6 +109,7 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
+import org.apache.tika.renderer.RenderResults;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.XHTMLContentHandler;
@@ -140,6 +141,8 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     final EmbeddedDocumentExtractor embeddedDocumentExtractor;
     final PDFParserConfig config;
     final Parser ocrParser;
+
+    final RenderResults renderResults;
     /**
      * Format used for signature dates
      * TODO Make this thread-safe
@@ -157,11 +160,12 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     int totalCharsPerPage = 0;
 
     AbstractPDF2XHTML(PDDocument pdDocument, ContentHandler handler, ParseContext context,
-                      Metadata metadata, PDFParserConfig config) throws IOException {
+                      Metadata metadata, RenderResults renderResults, PDFParserConfig config) throws IOException {
         this.pdDocument = pdDocument;
         this.xhtml = new XHTMLContentHandler(handler, metadata);
         this.context = context;
         this.metadata = metadata;
+        this.renderResults = renderResults;
         this.config = config;
         embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
         if (config.getOcrStrategy() == NO_OCR) {
@@ -482,6 +486,23 @@ class AbstractPDF2XHTML extends PDFTextStripper {
             }
         }
 
+        try (TemporaryResources tmp = new TemporaryResources()) {
+            Path tmpFile = renderPage(tmp);
+
+            try (InputStream is = TikaInputStream.get(tmpFile)) {
+                metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
+                        ocrImageMediaType.toString());
+                ocrParser.parse(is, new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
+                        metadata, context);
+            }
+        } catch (IOException e) {
+            handleCatchableIOE(e);
+        } catch (SAXException e) {
+            throw new IOException("error writing OCR content from PDF", e);
+        }
+    }
+
+    private Path renderPage(TemporaryResources tmpResources) {
         PDFRenderer renderer =
                 config.getOcrRenderingStrategy() == PDFParserConfig.OCR_RENDERING_STRATEGY.NO_TEXT ?
                         new NoTextPDFRenderer(pdDocument) : new PDFRenderer(pdDocument);
@@ -509,17 +530,6 @@ class AbstractPDF2XHTML extends PDFTextStripper {
                         ExceptionUtils.getStackTrace(e));
                 return;
             }
-            try (InputStream is = TikaInputStream.get(tmpFile)) {
-                metadata.set(TikaCoreProperties.CONTENT_TYPE_PARSER_OVERRIDE,
-                        ocrImageMediaType.toString());
-                ocrParser.parse(is, new EmbeddedContentHandler(new BodyContentHandler(xhtml)),
-                        metadata, context);
-            }
-        } catch (IOException e) {
-            handleCatchableIOE(e);
-        } catch (SAXException e) {
-            throw new IOException("error writing OCR content from PDF", e);
-        }
     }
 
     @Override
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
index 7493253bb..1930a1ae9 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/OCR2XHTML.java
@@ -29,6 +29,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.RenderResults;
 
 
 /**
@@ -38,7 +39,7 @@ import org.apache.tika.parser.ParseContext;
 class OCR2XHTML extends AbstractPDF2XHTML {
 
     private OCR2XHTML(PDDocument document, ContentHandler handler, ParseContext context,
-                      Metadata metadata, PDFParserConfig config) throws IOException {
+                      Metadata metadata, RenderResults renderResults, PDFParserConfig config) throws IOException {
         super(document, handler, context, metadata, config);
     }
 
@@ -53,11 +54,12 @@ class OCR2XHTML extends AbstractPDF2XHTML {
      * @throws TikaException if there was an exception outside of per page processing
      */
     public static void process(PDDocument document, ContentHandler handler, ParseContext context,
-                               Metadata metadata, PDFParserConfig config)
+                               Metadata metadata, RenderResults renderResults,
+                               PDFParserConfig config)
             throws SAXException, TikaException {
         OCR2XHTML ocr2XHTML = null;
         try {
-            ocr2XHTML = new OCR2XHTML(document, handler, context, metadata, config);
+            ocr2XHTML = new OCR2XHTML(document, handler, context, metadata, renderResults, config);
             ocr2XHTML.writeText(document, new Writer() {
                 @Override
                 public void write(char[] cbuf, int off, int len) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index 93d1b7e81..2b4f1368a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -40,6 +40,7 @@ import org.xml.sax.SAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.RenderResults;
 
 /**
  * Utility class that overrides the {@link PDFTextStripper} functionality
@@ -64,8 +65,8 @@ class PDF2XHTML extends AbstractPDF2XHTML {
     private AtomicInteger inlineImageCounter = new AtomicInteger(0);
 
     PDF2XHTML(PDDocument document, ContentHandler handler, ParseContext context, Metadata metadata,
-              PDFParserConfig config) throws IOException {
-        super(document, handler, context, metadata, config);
+              RenderResults renderResults, PDFParserConfig config) throws IOException {
+        super(document, handler, context, metadata, renderResults, config);
     }
 
     /**
@@ -79,7 +80,8 @@ class PDF2XHTML extends AbstractPDF2XHTML {
      * @throws TikaException if there was an exception outside of per page processing
      */
     public static void process(PDDocument document, ContentHandler handler, ParseContext context,
-                               Metadata metadata, PDFParserConfig config)
+                               Metadata metadata, RenderResults renderResults,
+                               PDFParserConfig config)
             throws SAXException, TikaException {
         PDF2XHTML pdf2XHTML = null;
         try {
@@ -90,7 +92,8 @@ class PDF2XHTML extends AbstractPDF2XHTML {
                 pdf2XHTML =
                         new AngleDetectingPDF2XHTML(document, handler, context, metadata, config);
             } else {
-                pdf2XHTML = new PDF2XHTML(document, handler, context, metadata, config);
+                pdf2XHTML = new PDF2XHTML(document, handler, context, metadata, renderResults,
+                        config);
             }
             config.configure(pdf2XHTML);
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 3835179b1..79b79f14f 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -64,6 +64,9 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.PasswordProvider;
+import org.apache.tika.parser.RenderingParser;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.Renderer;
 import org.apache.tika.sax.XHTMLContentHandler;
 
 /**
@@ -96,7 +99,7 @@ import org.apache.tika.sax.XHTMLContentHandler;
  * If your PDFs contain marked content or tags, consider
  * {@link PDFParserConfig#setExtractMarkedContent(boolean)}
  */
-public class PDFParser extends AbstractParser implements Initializable {
+public class PDFParser extends AbstractParser implements RenderingParser, Initializable {
 
     /**
      * Metadata key for giving the document password to the parser.
@@ -149,12 +152,17 @@ public class PDFParser extends AbstractParser implements Initializable {
                 pdfDocument = getPDDocument(new CloseShieldInputStream(stream), password,
                         memoryUsageSetting, metadata, context);
             }
+            tstream.setOpenContainer(pdfDocument);
             metadata.set(PDF.IS_ENCRYPTED, Boolean.toString(pdfDocument.isEncrypted()));
 
             metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString());
             extractMetadata(pdfDocument, metadata, context);
             AccessChecker checker = localConfig.getAccessChecker();
             checker.check(metadata);
+            RenderResults renderResults = null;
+            if (localConfig.getRenderer().getSupportedTypes(context).contains(MEDIA_TYPE)) {
+                renderResults = renderPDF(tstream, context, localConfig);
+            }
             if (handler != null) {
                 boolean hasXFA = hasXFA(pdfDocument);
                 metadata.set(PDF.HAS_XFA, Boolean.toString(hasXFA));
@@ -166,12 +174,15 @@ public class PDFParser extends AbstractParser implements Initializable {
                     handleXFAOnly(pdfDocument, handler, metadata, context);
                 } else if (localConfig.getOcrStrategy()
                         .equals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY)) {
-                    OCR2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
+                    OCR2XHTML.process(pdfDocument, handler, context, metadata, renderResults,
+                            localConfig);
                 } else if (hasMarkedContent && localConfig.isExtractMarkedContent()) {
                     PDFMarkedContent2XHTML
-                            .process(pdfDocument, handler, context, metadata, localConfig);
+                            .process(pdfDocument, handler, context, metadata, renderResults,
+                                    localConfig);
                 } else {
-                    PDF2XHTML.process(pdfDocument, handler, context, metadata, localConfig);
+                    PDF2XHTML.process(pdfDocument, handler, context, metadata, renderResults,
+                            localConfig);
                 }
             }
         } catch (InvalidPasswordException e) {
@@ -184,6 +195,14 @@ public class PDFParser extends AbstractParser implements Initializable {
         }
     }
 
+    private RenderResults renderPDF(TikaInputStream tstream,
+                                    ParseContext parseContext, PDFParserConfig localConfig)
+            throws IOException, TikaException {
+        Metadata metadata = new Metadata();
+        metadata.set(TikaCoreProperties.TYPE, MEDIA_TYPE.toString());
+        return localConfig.getRenderer().render(tstream, metadata, parseContext);
+    }
+
 
     protected PDDocument getPDDocument(InputStream inputStream, String password,
                                        MemoryUsageSetting memoryUsageSetting, Metadata metadata,
@@ -622,6 +641,11 @@ public class PDFParser extends AbstractParser implements Initializable {
         //no-op
     }
 
+    @Override
+    public void setRenderer(Renderer renderer) {
+        defaultConfig.setRenderer(renderer);
+    }
+
     /**
      * Copied from AcroformDefaultFixup minus generation of appearances and handling of orphan
      * widgets, which we don't need.
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
index af0e9617b..3b80ac061 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
@@ -29,6 +29,7 @@ import org.apache.pdfbox.rendering.ImageType;
 import org.apache.pdfbox.text.PDFTextStripper;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.renderer.Renderer;
 
 /**
  * Config for PDFParser.
@@ -130,6 +131,8 @@ public class PDFParserConfig implements Serializable {
 
     private boolean detectAngles = false;
 
+    private Renderer renderer;
+
     /**
      * @return whether or not to extract only inline image metadata and not render the images
      */
@@ -857,6 +860,14 @@ public class PDFParserConfig implements Serializable {
         return getMaxMainMemoryBytes() == config.getMaxMainMemoryBytes();
     }
 
+    public void setRenderer(Renderer renderer) {
+        this.renderer = renderer;
+    }
+
+    public Renderer getRenderer() {
+        return renderer;
+    }
+
     @Override
     public int hashCode() {
         int result = (isEnableAutoSpace() ? 1 : 0);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java
new file mode 100644
index 000000000..4f7a1d149
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/PDFBoxRenderer.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.renderer.pdf;
+
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.tools.imageio.ImageIOUtil;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.Rendering;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.renderer.RenderResult;
+import org.apache.tika.renderer.RenderResults;
+import org.apache.tika.renderer.Renderer;
+
+public class PDFBoxRenderer implements Renderer {
+
+    Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("pdf"));
+
+    /**
+     * This is the amount of time it takes for PDFBox to render the page
+     */
+    public static Property PDFBOX_RENDERING_TIME_MS =
+            Property.externalReal(Rendering.RENDERING_PREFIX + "pdfbox-rendering-ms");
+
+    /**
+     * This is the amount of time it takes for PDFBox/java to write the image after
+     * it has been rendered into a BufferedImage.  Some formats take much longer
+     * to encode than others.
+     */
+    public static Property PDFBOX_IMAGE_WRITING_TIME_MS =
+            Property.externalReal(Rendering.RENDERING_PREFIX + "pdfbox-image-writing-ms");
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    private int dpi = 300;
+    private ImageType imageType = ImageType.GRAY;
+    private String imageFormatName = "tiff";
+
+    @Override
+    public RenderResults render(InputStream is, Metadata metadata, ParseContext parseContext) throws IOException,
+            TikaException {
+
+
+        PDDocument pdDocument;
+        TikaInputStream tis = TikaInputStream.get(is);
+        boolean mustClose = false;
+        if (tis.getOpenContainer() != null) {
+            pdDocument = (PDDocument) tis.getOpenContainer();
+        } else {
+            pdDocument = PDDocument.load(is);
+            mustClose = true;
+        }
+        RenderResults results = new RenderResults(new TemporaryResources());
+        try {
+
+            PDFRenderer renderer = new PDFRenderer(pdDocument);
+
+            for (int i = 0; i < pdDocument.getNumberOfPages(); i++) {
+                Metadata m = new Metadata();
+                m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+                        TikaCoreProperties.EmbeddedResourceType.RENDERING.name());
+                try {
+                    m.set(Rendering.PAGE_NUMBER, i + 1);
+                    Path imagePath = renderPage(renderer, i, m);
+                    results.add(new RenderResult(RenderResult.STATUS.SUCCESS, imagePath, m));
+                } catch (IOException e) {
+                    EmbeddedDocumentUtil.recordException(e, m);
+                    results.add(new RenderResult(RenderResult.STATUS.EXCEPTION, null, m));
+                }
+            }
+        } finally {
+            if (mustClose) {
+                pdDocument.close();
+            }
+        }
+        return results;
+    }
+
+    private Path renderPage(PDFRenderer renderer, int pageIndex, Metadata metadata)
+            throws IOException {
+
+        Path tmpFile = Files.createTempFile("tika-pdfbox-rendering-",
+                "-" + (pageIndex + 1) + "." + imageFormatName);
+        try {
+            long start = System.currentTimeMillis();
+            BufferedImage image = renderer.renderImageWithDPI(pageIndex, dpi, imageType);
+            long renderingElapsed = System.currentTimeMillis() - start;
+            metadata.set(PDFBOX_RENDERING_TIME_MS, renderingElapsed);
+            start = System.currentTimeMillis();
+            try (OutputStream os = Files.newOutputStream(tmpFile)) {
+                ImageIOUtil.writeImage(image, imageFormatName, os, dpi);
+            }
+            long elapsedWrite = System.currentTimeMillis() - start;
+            metadata.set(PDFBOX_IMAGE_WRITING_TIME_MS, elapsedWrite);
+            metadata.set(Rendering.RENDERED_MS, renderingElapsed + elapsedWrite);
+        } catch (SecurityException e) {
+            //throw SecurityExceptions immediately
+            throw e;
+        } catch (IOException | RuntimeException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        return tmpFile;
+    }
+
+}