You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by dk...@apache.org on 2019/05/22 15:14:08 UTC

[sling-org-apache-sling-app-cms] branch master updated: Adding support for general thumbnail generation via Tika text extraction and powerpoints via POI

This is an automated email from the ASF dual-hosted git repository.

dklco pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-app-cms.git


The following commit(s) were added to refs/heads/master by this push:
     new b75e689  Adding support for general thumbnail generation via Tika text extraction and powerpoints via POI
b75e689 is described below

commit b75e6897fbed6d8ba1e8a3ca4df78b6737154481
Author: Dan Klco <dk...@apache.org>
AuthorDate: Wed May 22 11:13:40 2019 -0400

    Adding support for general thumbnail generation via Tika text extraction
    and powerpoints via POI
---
 core/pom.xml                                       |   4 +
 .../transformation/SlideShowThumbnailProvider.java |  87 +++++++++++++++++++++
 .../transformation/TikaFallbackProvider.java       |  85 ++++++++++++++++++++
 .../cms/core/helpers/SlingCMSContextHelper.java    |   3 +
 .../SlideShowThumbnailProviderTest.java            |  77 ++++++++++++++++++
 .../transformation/TikaFallbackProviderTest.java   |  55 +++++++++++++
 core/src/test/resources/Sling.docx                 | Bin 0 -> 15143 bytes
 core/src/test/resources/Sling.ppt                  | Bin 0 -> 123392 bytes
 core/src/test/resources/Sling.pptx                 | Bin 0 -> 99100 bytes
 pom.xml                                            |   6 ++
 10 files changed, 317 insertions(+)

diff --git a/core/pom.xml b/core/pom.xml
index 49e5799..2836a80 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -258,6 +258,10 @@
             <artifactId>poi-ooxml</artifactId>
         </dependency>
         <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-scratchpad</artifactId>
+        </dependency>
+        <dependency>
             <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.testing.sling-mock.junit4</artifactId>
         </dependency>
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProvider.java b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProvider.java
new file mode 100644
index 0000000..77bffde
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProvider.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.Graphics2D;
+import java.awt.geom.Rectangle2D;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+import javax.imageio.ImageIO;
+
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.sl.usermodel.Slide;
+import org.apache.poi.sl.usermodel.SlideShow;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.transformation.OutputFileFormat;
+import org.apache.sling.cms.transformation.ThumbnailProvider;
+import org.osgi.service.component.annotations.Component;
+
+import com.google.common.net.MediaType;
+
+/**
+ * Provides Thumbnails for Microsoft PPT and PPTX files.
+ */
+@Component(service = ThumbnailProvider.class)
+public class SlideShowThumbnailProvider implements ThumbnailProvider {
+
+    @Override
+    public boolean applies(File file) {
+        MediaType mt = MediaType.parse(file.getContentType());
+        return mt.is(MediaType.MICROSOFT_POWERPOINT) || mt.is(MediaType.OOXML_PRESENTATION);
+    }
+
+    @Override
+    public InputStream getThumbnail(File file) throws IOException {
+        SlideShow<?, ?> ppt = null;
+        MediaType mt = MediaType.parse(file.getContentType());
+        try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                InputStream is = file.getResource().adaptTo(InputStream.class)) {
+            if (mt.is(MediaType.MICROSOFT_POWERPOINT)) {
+                ppt = new HSLFSlideShow(is);
+            } else {
+                ppt = new XMLSlideShow(is);
+            }
+            Dimension dim = ppt.getPageSize();
+            List<? extends Slide<?, ?>> slides = ppt.getSlides();
+
+            BufferedImage img = new BufferedImage(dim.width, dim.height, BufferedImage.TYPE_INT_RGB);
+            Graphics2D graphics = img.createGraphics();
+            graphics.setPaint(Color.white);
+            graphics.fill(new Rectangle2D.Float(0, 0, dim.width, dim.height));
+
+            if (slides != null && !slides.isEmpty()) {
+                slides.get(0).draw(graphics);
+            }
+
+            ImageIO.write(img, OutputFileFormat.PNG.toString(), baos);
+            return new ByteArrayInputStream(baos.toByteArray());
+        } finally {
+            if (ppt != null) {
+                ppt.close();
+            }
+        }
+    }
+
+}
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProvider.java b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProvider.java
new file mode 100644
index 0000000..0437286
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProvider.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import java.awt.Graphics;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.imageio.ImageIO;
+import javax.swing.JEditorPane;
+
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.transformation.OutputFileFormat;
+import org.apache.sling.cms.transformation.ThumbnailProvider;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.osgi.framework.Constants;
+import org.osgi.service.component.annotations.Component;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+@Component(service = ThumbnailProvider.class, property = { Constants.SERVICE_RANKING + "=" + Integer.MIN_VALUE })
+public class TikaFallbackProvider implements ThumbnailProvider {
+
+    private static final Logger log = LoggerFactory.getLogger(TikaFallbackProvider.class);
+
+    @Override
+    public boolean applies(File file) {
+        return true;
+    }
+
+    @Override
+    public InputStream getThumbnail(File file) throws IOException {
+
+        log.info("Extracting content thumbnail from {}", file.getPath());
+        try {
+
+            log.debug("Extracting file contents");
+            InputStream is = file.getResource().adaptTo(InputStream.class);
+            Parser parser = new AutoDetectParser();
+            BodyContentHandler handler = new BodyContentHandler();
+            Metadata md = new Metadata();
+            ParseContext context = new ParseContext();
+            parser.parse(is, handler, md, context);
+
+            log.debug("Creating thumbnail of file contents");
+            int width = 500;
+            int height = 500;
+            BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
+            Graphics graphics = image.createGraphics();
+            JEditorPane jep = new JEditorPane("text/html", handler.toString());
+            jep.setSize(width, height);
+            jep.print(graphics);
+
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            ImageIO.write(image, OutputFileFormat.PNG.toString(), baos);
+            return new ByteArrayInputStream(baos.toByteArray());
+        } catch (SAXException | TikaException e) {
+            throw new IOException("Failed to generate thumbnail from " + file.getPath(), e);
+        }
+    }
+
+}
diff --git a/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java b/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java
index aaef06b..c86d0ed 100644
--- a/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java
+++ b/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java
@@ -32,6 +32,9 @@ public class SlingCMSContextHelper {
         context.load().json("/content.json", "/content");
         context.load().binaryResource("/apache.png", "/content/apache/sling-apache-org/index/apache.png/jcr:content");
         context.load().binaryResource("/sling.pdf", "/content/apache/sling-apache-org/index/sling.pdf/jcr:content");
+        context.load().binaryResource("/Sling.docx", "/content/apache/sling-apache-org/index/Sling.docx/jcr:content");
+        context.load().binaryResource("/Sling.pptx", "/content/apache/sling-apache-org/index/Sling.pptx/jcr:content");
+        context.load().binaryResource("/Sling.ppt", "/content/apache/sling-apache-org/index/Sling.ppt/jcr:content");
 
         context.registerAdapter(Resource.class, InputStream.class, new Function<Resource, InputStream>() {
             public InputStream apply(Resource input) {
diff --git a/core/src/test/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProviderTest.java b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProviderTest.java
new file mode 100644
index 0000000..6608354
--- /dev/null
+++ b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProviderTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.core.helpers.SlingCMSContextHelper;
+import org.apache.sling.cms.transformation.ThumbnailProvider;
+import org.apache.sling.testing.mock.sling.junit.SlingContext;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SlideShowThumbnailProviderTest {
+
+    private static final Logger log = LoggerFactory.getLogger(SlideShowThumbnailProviderTest.class);
+
+    @Rule
+    public final SlingContext context = new SlingContext();
+
+    private File docxFile;
+    private File pptFile;
+    private File pptxFile;
+
+    private ThumbnailProvider provider;
+
+    @Before
+    public void init() {
+        SlingCMSContextHelper.initContext(context);
+        docxFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.docx")
+                .adaptTo(File.class);
+        pptxFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.pptx")
+                .adaptTo(File.class);
+        pptFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.ppt")
+                .adaptTo(File.class);
+
+        provider = new SlideShowThumbnailProvider();
+
+    }
+
+    @Test
+    public void testApplies() throws IOException {
+        log.info("testApplies");
+        assertTrue(provider.applies(pptxFile));
+        assertTrue(provider.applies(pptFile));
+        assertFalse(provider.applies(docxFile));
+    }
+
+    @Test
+    public void testGetThumbnail() throws IOException {
+        log.info("testGetThumbnail");
+        assertNotNull(provider.getThumbnail(pptxFile));
+        assertNotNull(provider.getThumbnail(pptFile));
+    }
+
+}
diff --git a/core/src/test/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProviderTest.java b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProviderTest.java
new file mode 100644
index 0000000..4b17b49
--- /dev/null
+++ b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProviderTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import static org.junit.Assert.assertNotNull;
+
+import java.io.IOException;
+
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.core.helpers.SlingCMSContextHelper;
+import org.apache.sling.testing.mock.sling.junit.SlingContext;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TikaFallbackProviderTest {
+
+    private static final Logger log = LoggerFactory.getLogger(TikaFallbackProviderTest.class);
+
+    @Rule
+    public final SlingContext context = new SlingContext();
+
+    private File docxFile;
+
+    @Before
+    public void init() {
+        SlingCMSContextHelper.initContext(context);
+        docxFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.docx")
+                .adaptTo(File.class);
+    }
+
+    @Test
+    public void testTikaProvider() throws IOException {
+        log.info("testTikaProvider");
+        TikaFallbackProvider tfp = new TikaFallbackProvider();
+        assertNotNull(tfp.getThumbnail(docxFile));
+    }
+
+}
diff --git a/core/src/test/resources/Sling.docx b/core/src/test/resources/Sling.docx
new file mode 100644
index 0000000..73fba2a
Binary files /dev/null and b/core/src/test/resources/Sling.docx differ
diff --git a/core/src/test/resources/Sling.ppt b/core/src/test/resources/Sling.ppt
new file mode 100644
index 0000000..0e9a6d3
Binary files /dev/null and b/core/src/test/resources/Sling.ppt differ
diff --git a/core/src/test/resources/Sling.pptx b/core/src/test/resources/Sling.pptx
new file mode 100644
index 0000000..8ed0a34
Binary files /dev/null and b/core/src/test/resources/Sling.pptx differ
diff --git a/pom.xml b/pom.xml
index 05fcc66..131a5c7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -258,6 +258,12 @@
                 <scope>provided</scope>
             </dependency>
             <dependency>
+                <groupId>org.apache.poi</groupId>
+                <artifactId>poi-scratchpad</artifactId>
+                <version>4.0.1</version>
+                <scope>provided</scope>
+            </dependency>
+            <dependency>
                 <groupId>org.apache.sling</groupId>
                 <artifactId>org.apache.sling.testing.sling-mock.junit4</artifactId>
                 <version>2.3.0</version>