You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by dk...@apache.org on 2019/05/22 15:14:08 UTC
[sling-org-apache-sling-app-cms] branch master updated: Adding
support for general thumbnail generation via Tika text extraction and
powerpoints via POI
This is an automated email from the ASF dual-hosted git repository.
dklco pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-app-cms.git
The following commit(s) were added to refs/heads/master by this push:
new b75e689 Adding support for general thumbnail generation via Tika text extraction and powerpoints via POI
b75e689 is described below
commit b75e6897fbed6d8ba1e8a3ca4df78b6737154481
Author: Dan Klco <dk...@apache.org>
AuthorDate: Wed May 22 11:13:40 2019 -0400
Adding support for general thumbnail generation via Tika text extraction
and powerpoints via POI
---
core/pom.xml | 4 +
.../transformation/SlideShowThumbnailProvider.java | 87 +++++++++++++++++++++
.../transformation/TikaFallbackProvider.java | 85 ++++++++++++++++++++
.../cms/core/helpers/SlingCMSContextHelper.java | 3 +
.../SlideShowThumbnailProviderTest.java | 77 ++++++++++++++++++
.../transformation/TikaFallbackProviderTest.java | 55 +++++++++++++
core/src/test/resources/Sling.docx | Bin 0 -> 15143 bytes
core/src/test/resources/Sling.ppt | Bin 0 -> 123392 bytes
core/src/test/resources/Sling.pptx | Bin 0 -> 99100 bytes
pom.xml | 6 ++
10 files changed, 317 insertions(+)
diff --git a/core/pom.xml b/core/pom.xml
index 49e5799..2836a80 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -258,6 +258,10 @@
<artifactId>poi-ooxml</artifactId>
</dependency>
<dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-scratchpad</artifactId>
+ </dependency>
+ <dependency>
<groupId>org.apache.sling</groupId>
<artifactId>org.apache.sling.testing.sling-mock.junit4</artifactId>
</dependency>
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProvider.java b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProvider.java
new file mode 100644
index 0000000..77bffde
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProvider.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.Graphics2D;
+import java.awt.geom.Rectangle2D;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+
+import javax.imageio.ImageIO;
+
+import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.sl.usermodel.Slide;
+import org.apache.poi.sl.usermodel.SlideShow;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.transformation.OutputFileFormat;
+import org.apache.sling.cms.transformation.ThumbnailProvider;
+import org.osgi.service.component.annotations.Component;
+
+import com.google.common.net.MediaType;
+
+/**
+ * Provides Thumbnails for Microsoft PPT and PPTX files.
+ */
+@Component(service = ThumbnailProvider.class)
+public class SlideShowThumbnailProvider implements ThumbnailProvider {
+
+ @Override
+ public boolean applies(File file) {
+ MediaType mt = MediaType.parse(file.getContentType());
+ return mt.is(MediaType.MICROSOFT_POWERPOINT) || mt.is(MediaType.OOXML_PRESENTATION);
+ }
+
+ @Override
+ public InputStream getThumbnail(File file) throws IOException {
+ SlideShow<?, ?> ppt = null;
+ MediaType mt = MediaType.parse(file.getContentType());
+ try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ InputStream is = file.getResource().adaptTo(InputStream.class)) {
+ if (mt.is(MediaType.MICROSOFT_POWERPOINT)) {
+ ppt = new HSLFSlideShow(is);
+ } else {
+ ppt = new XMLSlideShow(is);
+ }
+ Dimension dim = ppt.getPageSize();
+ List<? extends Slide<?, ?>> slides = ppt.getSlides();
+
+ BufferedImage img = new BufferedImage(dim.width, dim.height, BufferedImage.TYPE_INT_RGB);
+ Graphics2D graphics = img.createGraphics();
+ graphics.setPaint(Color.white);
+ graphics.fill(new Rectangle2D.Float(0, 0, dim.width, dim.height));
+
+ if (slides != null && !slides.isEmpty()) {
+ slides.get(0).draw(graphics);
+ }
+
+ ImageIO.write(img, OutputFileFormat.PNG.toString(), baos);
+ return new ByteArrayInputStream(baos.toByteArray());
+ } finally {
+ if (ppt != null) {
+ ppt.close();
+ }
+ }
+ }
+
+}
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProvider.java b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProvider.java
new file mode 100644
index 0000000..0437286
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProvider.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import java.awt.Graphics;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.imageio.ImageIO;
+import javax.swing.JEditorPane;
+
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.transformation.OutputFileFormat;
+import org.apache.sling.cms.transformation.ThumbnailProvider;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.osgi.framework.Constants;
+import org.osgi.service.component.annotations.Component;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+@Component(service = ThumbnailProvider.class, property = { Constants.SERVICE_RANKING + "=" + Integer.MIN_VALUE })
+public class TikaFallbackProvider implements ThumbnailProvider {
+
+ private static final Logger log = LoggerFactory.getLogger(TikaFallbackProvider.class);
+
+ @Override
+ public boolean applies(File file) {
+ return true;
+ }
+
+ @Override
+ public InputStream getThumbnail(File file) throws IOException {
+
+ log.info("Extracting content thumbnail from {}", file.getPath());
+ try {
+
+ log.debug("Extracting file contents");
+ InputStream is = file.getResource().adaptTo(InputStream.class);
+ Parser parser = new AutoDetectParser();
+ BodyContentHandler handler = new BodyContentHandler();
+ Metadata md = new Metadata();
+ ParseContext context = new ParseContext();
+ parser.parse(is, handler, md, context);
+
+ log.debug("Creating thumbnail of file contents");
+ int width = 500;
+ int height = 500;
+ BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
+ Graphics graphics = image.createGraphics();
+ JEditorPane jep = new JEditorPane("text/html", handler.toString());
+ jep.setSize(width, height);
+ jep.print(graphics);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ ImageIO.write(image, OutputFileFormat.PNG.toString(), baos);
+ return new ByteArrayInputStream(baos.toByteArray());
+ } catch (SAXException | TikaException e) {
+ throw new IOException("Failed to generate thumbnail from " + file.getPath(), e);
+ }
+ }
+
+}
diff --git a/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java b/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java
index aaef06b..c86d0ed 100644
--- a/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java
+++ b/core/src/test/java/org/apache/sling/cms/core/helpers/SlingCMSContextHelper.java
@@ -32,6 +32,9 @@ public class SlingCMSContextHelper {
context.load().json("/content.json", "/content");
context.load().binaryResource("/apache.png", "/content/apache/sling-apache-org/index/apache.png/jcr:content");
context.load().binaryResource("/sling.pdf", "/content/apache/sling-apache-org/index/sling.pdf/jcr:content");
+ context.load().binaryResource("/Sling.docx", "/content/apache/sling-apache-org/index/Sling.docx/jcr:content");
+ context.load().binaryResource("/Sling.pptx", "/content/apache/sling-apache-org/index/Sling.pptx/jcr:content");
+ context.load().binaryResource("/Sling.ppt", "/content/apache/sling-apache-org/index/Sling.ppt/jcr:content");
context.registerAdapter(Resource.class, InputStream.class, new Function<Resource, InputStream>() {
public InputStream apply(Resource input) {
diff --git a/core/src/test/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProviderTest.java b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProviderTest.java
new file mode 100644
index 0000000..6608354
--- /dev/null
+++ b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/SlideShowThumbnailProviderTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.core.helpers.SlingCMSContextHelper;
+import org.apache.sling.cms.transformation.ThumbnailProvider;
+import org.apache.sling.testing.mock.sling.junit.SlingContext;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class SlideShowThumbnailProviderTest {
+
+ private static final Logger log = LoggerFactory.getLogger(SlideShowThumbnailProviderTest.class);
+
+ @Rule
+ public final SlingContext context = new SlingContext();
+
+ private File docxFile;
+ private File pptFile;
+ private File pptxFile;
+
+ private ThumbnailProvider provider;
+
+ @Before
+ public void init() {
+ SlingCMSContextHelper.initContext(context);
+ docxFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.docx")
+ .adaptTo(File.class);
+ pptxFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.pptx")
+ .adaptTo(File.class);
+ pptFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.ppt")
+ .adaptTo(File.class);
+
+ provider = new SlideShowThumbnailProvider();
+
+ }
+
+ @Test
+ public void testApplies() throws IOException {
+ log.info("testApplies");
+ assertTrue(provider.applies(pptxFile));
+ assertTrue(provider.applies(pptFile));
+ assertFalse(provider.applies(docxFile));
+ }
+
+ @Test
+ public void testGetThumbnail() throws IOException {
+ log.info("testGetThumbnail");
+ assertNotNull(provider.getThumbnail(pptxFile));
+ assertNotNull(provider.getThumbnail(pptFile));
+ }
+
+}
diff --git a/core/src/test/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProviderTest.java b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProviderTest.java
new file mode 100644
index 0000000..4b17b49
--- /dev/null
+++ b/core/src/test/java/org/apache/sling/cms/core/internal/transformation/TikaFallbackProviderTest.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.transformation;
+
+import static org.junit.Assert.assertNotNull;
+
+import java.io.IOException;
+
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.core.helpers.SlingCMSContextHelper;
+import org.apache.sling.testing.mock.sling.junit.SlingContext;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TikaFallbackProviderTest {
+
+ private static final Logger log = LoggerFactory.getLogger(TikaFallbackProviderTest.class);
+
+ @Rule
+ public final SlingContext context = new SlingContext();
+
+ private File docxFile;
+
+ @Before
+ public void init() {
+ SlingCMSContextHelper.initContext(context);
+ docxFile = context.resourceResolver().getResource("/content/apache/sling-apache-org/index/Sling.docx")
+ .adaptTo(File.class);
+ }
+
+ @Test
+ public void testTikaProvider() throws IOException {
+ log.info("testTikaProvider");
+ TikaFallbackProvider tfp = new TikaFallbackProvider();
+ assertNotNull(tfp.getThumbnail(docxFile));
+ }
+
+}
diff --git a/core/src/test/resources/Sling.docx b/core/src/test/resources/Sling.docx
new file mode 100644
index 0000000..73fba2a
Binary files /dev/null and b/core/src/test/resources/Sling.docx differ
diff --git a/core/src/test/resources/Sling.ppt b/core/src/test/resources/Sling.ppt
new file mode 100644
index 0000000..0e9a6d3
Binary files /dev/null and b/core/src/test/resources/Sling.ppt differ
diff --git a/core/src/test/resources/Sling.pptx b/core/src/test/resources/Sling.pptx
new file mode 100644
index 0000000..8ed0a34
Binary files /dev/null and b/core/src/test/resources/Sling.pptx differ
diff --git a/pom.xml b/pom.xml
index 05fcc66..131a5c7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -258,6 +258,12 @@
<scope>provided</scope>
</dependency>
<dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-scratchpad</artifactId>
+ <version>4.0.1</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
<groupId>org.apache.sling</groupId>
<artifactId>org.apache.sling.testing.sling-mock.junit4</artifactId>
<version>2.3.0</version>