You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by dk...@apache.org on 2019/05/15 20:51:33 UTC

[sling-org-apache-sling-app-cms] 06/07: Updated to fix issues with and cleanup the the File Metadata Extractor and adding a servlet for generating thumbnails

This is an automated email from the ASF dual-hosted git repository.

dklco pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-app-cms.git

commit b6fc5024ee941b83248cbce75404d2f632e540db
Author: Dan Klco <dk...@apache.org>
AuthorDate: Wed May 15 09:11:03 2019 -0400

    Updated to fix issues with and cleanup the the File Metadata Extractor
    and adding a servlet for generating thumbnails
---
 .../java/org/apache/sling/cms/CMSConstants.java    |   7 +-
 .../apache/sling/cms/FileMetadataExtractor.java    |  59 +++++++
 builder/pom.xml                                    |   1 -
 builder/src/main/provisioning/cms.txt              |  30 +++-
 builder/src/main/provisioning/sling.txt            |   1 -
 core/pom.xml                                       |  17 ++
 .../core/internal/FileMetadataExtractorImpl.java   | 178 +++++++++++++++++++++
 .../internal/jobs/FileMetadataExtractorJob.java    |   9 +-
 .../internal/listeners/FileMetadataExtractor.java  | 155 ------------------
 .../listeners/FileMetadataExtractorListener.java   |  90 +++++++++++
 .../sling/cms/core/internal/models/FileImpl.java   |   4 +-
 .../core/internal/servlets/TransformServlet.java   |  82 ++++++++++
 .../internal/FileMetadataExtractorImplTest.java    |  62 +++++++
 core/src/test/resources/apache.png                 | Bin 0 -> 12022 bytes
 metadata-extractor/pom.xml                         | 102 ++++++++++++
 pom.xml                                            |  13 ++
 ...eUserMapperImpl.amended-sling-cms-metadata.json |   6 +
 17 files changed, 644 insertions(+), 172 deletions(-)

diff --git a/api/src/main/java/org/apache/sling/cms/CMSConstants.java b/api/src/main/java/org/apache/sling/cms/CMSConstants.java
index d74e6e7..58416f7 100644
--- a/api/src/main/java/org/apache/sling/cms/CMSConstants.java
+++ b/api/src/main/java/org/apache/sling/cms/CMSConstants.java
@@ -60,6 +60,11 @@ public class CMSConstants {
     public static final String NAMESPACE = "sling";
 
     /**
+     * The subpath for the metadata under sling:File resources
+     */
+    public static final String NN_METADATA = "metadata";
+
+    /**
      * Component node type.
      */
     public static final String NT_COMPONENT = NAMESPACE + ":Component";
@@ -129,7 +134,7 @@ public class CMSConstants {
      */
     public static final String[] PUBLISHABLE_TYPES = new String[] { CMSConstants.NT_FILE, CMSConstants.NT_PAGE,
             JcrResourceConstants.NT_SLING_FOLDER, JcrResourceConstants.NT_SLING_ORDERED_FOLDER };
-
+    
     /**
      * The name of the admin user
      */
diff --git a/api/src/main/java/org/apache/sling/cms/FileMetadataExtractor.java b/api/src/main/java/org/apache/sling/cms/FileMetadataExtractor.java
new file mode 100644
index 0000000..54a709e
--- /dev/null
+++ b/api/src/main/java/org/apache/sling/cms/FileMetadataExtractor.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.osgi.annotation.versioning.ProviderType;
+
+/**
+ * Service for extracting metadata from a file
+ */
+@ProviderType
+public interface FileMetadataExtractor {
+
+    /**
+     * Extract the metadata from the specified file and return the resulting
+     * metadata
+     * 
+     * @param file
+     * @return the metadata from the file
+     * @throws IOException
+     */
+    Map<String,Object> extractMetadata(File file) throws IOException;
+
+    /**
+     * Extract the metadata from the specified file and persist the results under
+     * the jcr:content/metadata node of the file resource
+     * 
+     * @param file the file to extract the metadata from
+     * @throws IOException
+     */
+    void updateMetadata(File file) throws IOException;
+
+    /**
+     * Extract the metadata from the specified file and persist the results under
+     * the jcr:content/metadata node of the file resource
+     * 
+     * @param file the file to extract the metadata from
+     * @param save if true, persist the results, if not leave the changes
+     *             unpersisted
+     * @throws IOException
+     */
+    void updateMetadata(File file, boolean persist) throws IOException;
+}
diff --git a/builder/pom.xml b/builder/pom.xml
index 7541f92..d469f15 100644
--- a/builder/pom.xml
+++ b/builder/pom.xml
@@ -153,7 +153,6 @@
         <dependency>
             <groupId>org.apache.httpcomponents</groupId>
             <artifactId>httpclient</artifactId>
-            <version>4.5.2</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/builder/src/main/provisioning/cms.txt b/builder/src/main/provisioning/cms.txt
index 4b8f6d1..b0b7960 100644
--- a/builder/src/main/provisioning/cms.txt
+++ b/builder/src/main/provisioning/cms.txt
@@ -17,18 +17,26 @@
 #  under the License.
 [feature name=cms]
 
-[artifacts startLevel=20]
+[artifacts startLevel=15]
     org.apache.cocoon/cocoon-serializers-charsets/1.0.0
     org.jsoup/jsoup/1.11.3
-    org.jsoup/jsoup/1.11.3
     com.vladsch.flexmark/flexmark-osgi/0.34.22
     org.apache.sling/org.apache.sling.fileoptim/0.9.2
     org.apache.sling/org.apache.sling.resourcemerger/1.3.6
+    org.apache.servicemix.bundles/org.apache.servicemix.bundles.xmlbeans/3.0.2_1
+    org.apache.commons/commons-math3/3.6.1
+    org.apache.commons/commons-compress/1.18
+
+[artifacts startLevel=20]
+    org.apache.servicemix.bundles/org.apache.servicemix.bundles.poi/4.0.1_2
     org.apache.sling/org.apache.sling.cms.api/${cms.version}
     org.apache.sling/org.apache.sling.cms.core/${cms.version}
     org.apache.sling/org.apache.sling.cms.ui/${cms.version}
     org.apache.sling/org.apache.sling.cms.reference/${cms.version}
     org.apache.tika/tika-bundle/1.19.1
+
+[artifacts startLevel=25]
+    org.apache.sling/org.apache.sling.cms.metadata-extractor/${cms.version}
     
 [:repoinit]
 
@@ -80,16 +88,22 @@
     end
 
     # Service users
-    create service user sling-rewriter
-    set ACL for sling-rewriter
-        allow   jcr:read    on /
-    end
     create service user sling-cms-error
     set ACL for sling-cms-error
     	allow	jcr:read	on /
     end
-    create service user sling-ugc
-    set ACL for sling-ugc
+    create service user sling-cms-metadata
+    set ACL for sling-cms-metadata
+        allow   jcr:write,jcr:nodeTypeManagement,jcr:versionManagement    on /content
+        allow   jcr:write,jcr:nodeTypeManagement,jcr:versionManagement    on /static
+        allow   jcr:read    on /
+    end
+    create service user sling-rewriter
+    set ACL for sling-rewriter
+        allow   jcr:read    on /
+    end
+    create service user sling-cms-ugc
+    set ACL for sling-cms-ugc
         allow   jcr:write    on /etc/usergenerated
     end
     create service user sling-cms-versionmgr
diff --git a/builder/src/main/provisioning/sling.txt b/builder/src/main/provisioning/sling.txt
index dad9dd2..087c498 100644
--- a/builder/src/main/provisioning/sling.txt
+++ b/builder/src/main/provisioning/sling.txt
@@ -95,7 +95,6 @@
     org.apache.pdfbox/fontbox/2.0.12
     org.apache.pdfbox/jempbox/1.8.16
     org.apache.tika/tika-core/1.19.1
-    org.apache.tika/tika-parsers/1.19.1
 
 [artifacts startLevel=15]
     org.apache.sling/org.apache.sling.jcr.jcr-wrapper/2.0.0
diff --git a/core/pom.xml b/core/pom.xml
index 58358e7..04a5232 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -34,6 +34,7 @@
                             org.apache.sling.cms.core.models,
                             org.apache.sling.cms.core.internal.models
                         </Sling-Model-Packages>
+                        <Embed-Dependency>*;scope=compile|runtime</Embed-Dependency>
                     </instructions>
                 </configuration>
             </plugin>
@@ -197,6 +198,7 @@
             <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.cms.api</artifactId>
             <version>${project.version}</version>
+            <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>org.apache.jackrabbit</groupId>
@@ -238,5 +240,20 @@
             <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.fileoptim</artifactId>
         </dependency>
+        <dependency>
+            <groupId>net.coobird</groupId>
+            <artifactId>thumbnailator</artifactId>
+            <version>[0.4, 0.5)</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-core</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <artifactId>pdfbox</artifactId>
+            <groupId>org.apache.pdfbox</groupId>
+        </dependency>
     </dependencies>
 </project>
\ No newline at end of file
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/FileMetadataExtractorImpl.java b/core/src/main/java/org/apache/sling/cms/core/internal/FileMetadataExtractorImpl.java
new file mode 100644
index 0000000..fd0623f
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/FileMetadataExtractorImpl.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.jackrabbit.JcrConstants;
+import org.apache.jackrabbit.util.Text;
+import org.apache.sling.api.resource.ModifiableValueMap;
+import org.apache.sling.api.resource.Resource;
+import org.apache.sling.cms.CMSConstants;
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.FileMetadataExtractor;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.wiring.FrameworkWiring;
+import org.osgi.service.component.ComponentContext;
+import org.osgi.service.component.annotations.Activate;
+import org.osgi.service.component.annotations.Component;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+@Component(service = FileMetadataExtractor.class)
+public class FileMetadataExtractorImpl implements FileMetadataExtractor {
+
+    private static final String METADATA_EXTRACTOR_BUNDLE_NAME = "org.apache.sling.cms.metadata-extractor";
+
+    private static final Logger log = LoggerFactory.getLogger(FileMetadataExtractorImpl.class);
+
+    private BundleContext bcx;
+
+    @Activate
+    public void activate(ComponentContext context) {
+        bcx = context.getBundleContext();
+    }
+
+    @Override
+    public Map<String, Object> extractMetadata(File file) throws IOException {
+        try {
+            return extractMetadata(file.getResource());
+        } catch (NoClassDefFoundError ncdfe) {
+
+            log.info("Caught exception: {}, Attempting to reload metadata extractor bundle", String.valueOf(ncdfe));
+            Bundle metadataExtractorBundle = Arrays.stream(bcx.getBundles())
+                    .filter(b -> METADATA_EXTRACTOR_BUNDLE_NAME.equals(b.getSymbolicName())).findFirst().orElse(null);
+
+            if (metadataExtractorBundle != null) {
+                try {
+                    log.debug("Reloading metadata bundle: {}", metadataExtractorBundle);
+                    Bundle systemBundle = bcx.getBundle(0);
+                    metadataExtractorBundle.stop();
+                    metadataExtractorBundle.start();
+
+                    FrameworkWiring frameworkWiring = systemBundle.adapt(FrameworkWiring.class);
+                    frameworkWiring.refreshBundles(Collections.singleton(metadataExtractorBundle));
+                    log.debug("Bundle reloaded successfully!");
+                } catch (Exception e) {
+                    log.warn("Failed to refresh metadata exporter packages", e);
+                }
+            }
+            try {
+                return extractMetadata(file.getResource());
+            } catch (SAXException | TikaException | NoClassDefFoundError e) {
+                throw new IOException("Failed to parse metadata after reloading metadata extractor bundle", e);
+            }
+        } catch (SAXException | TikaException e) {
+            throw new IOException("Failed to parse metadata", e);
+        }
+    }
+
+    @Override
+    public void updateMetadata(File file) throws IOException {
+        updateMetadata(file, true);
+    }
+
+    @Override
+    public void updateMetadata(File file, boolean persist) throws IOException {
+        log.trace("Updating metadata for {}, persist {}", file, persist);
+        try {
+            Resource resource = file.getResource();
+            Resource content = resource.getChild(JcrConstants.JCR_CONTENT);
+            if (content == null) {
+                log.warn("Content resource is null");
+                return;
+            }
+
+            Map<String, Object> properties = null;
+            Resource metadata = content.getChild(CMSConstants.NN_METADATA);
+            if (metadata != null) {
+                properties = metadata.adaptTo(ModifiableValueMap.class);
+            } else {
+                properties = new HashMap<>();
+                properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
+            }
+            if (properties != null) {
+                properties.putAll(extractMetadata(file.getResource()));
+                if (metadata == null) {
+                    resource.getResourceResolver().create(content, CMSConstants.NN_METADATA, properties);
+                }
+                if (persist) {
+                    resource.getResourceResolver().commit();
+                }
+                log.info("Metadata extracted from {}", resource.getPath());
+            } else {
+                throw new IOException("Unable to update metadata for " + resource.getPath());
+            }
+        } catch (SAXException | TikaException e) {
+            throw new IOException("Failed to parse metadata", e);
+        }
+
+    }
+
+    public Map<String, Object> extractMetadata(Resource resource) throws IOException, SAXException, TikaException {
+        log.info("Extracting metadata from {}", resource.getPath());
+        InputStream is = resource.adaptTo(InputStream.class);
+        Map<String, Object> properties = new HashMap<>();
+        Parser parser = new AutoDetectParser();
+        BodyContentHandler handler = new BodyContentHandler();
+        Metadata md = new Metadata();
+        ParseContext context = new ParseContext();
+        parser.parse(is, handler, md, context);
+        for (String name : md.names()) {
+            putMetadata(properties, name, md);
+        }
+        return properties;
+
+    }
+
+    private void putMetadata(Map<String, Object> properties, String name, Metadata metadata) {
+        log.trace("Updating property: {}", name);
+        String filtered = Text.escapeIllegalJcrChars(name);
+        Property property = Property.get(name);
+        if (property != null) {
+            if (metadata.isMultiValued(property)) {
+                properties.put(filtered, metadata.getValues(property));
+            } else if (metadata.getDate(property) != null) {
+                Calendar cal = Calendar.getInstance();
+                cal.setTime(metadata.getDate(property));
+                properties.put(filtered, cal);
+            } else if (metadata.getInt(property) != null) {
+                properties.put(filtered, metadata.getInt(property));
+            } else {
+                properties.put(filtered, metadata.get(property));
+            }
+        } else {
+            properties.put(filtered, metadata.get(name));
+        }
+    }
+
+}
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/jobs/FileMetadataExtractorJob.java b/core/src/main/java/org/apache/sling/cms/core/internal/jobs/FileMetadataExtractorJob.java
index b09d0ce..5655c0d 100644
--- a/core/src/main/java/org/apache/sling/cms/core/internal/jobs/FileMetadataExtractorJob.java
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/jobs/FileMetadataExtractorJob.java
@@ -16,6 +16,7 @@
  */
 package org.apache.sling.cms.core.internal.jobs;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -26,7 +27,7 @@ import org.apache.sling.api.resource.ResourceResolverFactory;
 import org.apache.sling.cms.CMSConstants;
 import org.apache.sling.cms.ConfigurableJobExecutor;
 import org.apache.sling.cms.File;
-import org.apache.sling.cms.core.internal.listeners.FileMetadataExtractor;
+import org.apache.sling.cms.FileMetadataExtractor;
 import org.apache.sling.event.jobs.Job;
 import org.apache.sling.event.jobs.consumer.JobConsumer;
 import org.apache.sling.event.jobs.consumer.JobExecutionContext;
@@ -89,11 +90,11 @@ public class FileMetadataExtractorJob extends ConfigurableJobExecutor {
                     extractor.extractMetadata(file);
                     context.incrementProgressCount(processed++);
                     context.log("Extracted metadata for {0}", file.getPath());
-                } catch (Throwable t) {
+                } catch (IOException e) {
                     context.log("Failed to extract matadata for {0}", file.getPath());
                     context.incrementProgressCount(processed++);
-                    context.log("Exception {0}", t.getMessage());
-                    log.warn("Failed to extract metadata for " + file.getPath(), t);
+                    context.log("Exception {0}", e.getMessage());
+                    log.warn("Failed to extract metadata for " + file.getPath(), e);
                 }
             }
 
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/listeners/FileMetadataExtractor.java b/core/src/main/java/org/apache/sling/cms/core/internal/listeners/FileMetadataExtractor.java
deleted file mode 100644
index d2b94a6..0000000
--- a/core/src/main/java/org/apache/sling/cms/core/internal/listeners/FileMetadataExtractor.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.sling.cms.core.internal.listeners;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Calendar;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.jackrabbit.JcrConstants;
-import org.apache.jackrabbit.util.Text;
-import org.apache.sling.api.resource.LoginException;
-import org.apache.sling.api.resource.ModifiableValueMap;
-import org.apache.sling.api.resource.Resource;
-import org.apache.sling.api.resource.ResourceResolver;
-import org.apache.sling.api.resource.ResourceResolverFactory;
-import org.apache.sling.api.resource.observation.ExternalResourceChangeListener;
-import org.apache.sling.api.resource.observation.ResourceChange;
-import org.apache.sling.api.resource.observation.ResourceChangeListener;
-import org.apache.sling.cms.File;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Property;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.osgi.service.component.annotations.Component;
-import org.osgi.service.component.annotations.Reference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.SAXException;
-
-/**
- * A Resource Change Listener which extracts the metadata from sling:Files when
- * they are uploaded.
- */
-@Component(service = { FileMetadataExtractor.class, ResourceChangeListener.class,
-        ExternalResourceChangeListener.class }, property = { ResourceChangeListener.CHANGES + "=ADDED",
-                ResourceChangeListener.PATHS + "=/content",
-                ResourceChangeListener.PATHS + "=/static" }, immediate = true)
-public class FileMetadataExtractor implements ResourceChangeListener, ExternalResourceChangeListener {
-
-    public static final String NN_METADATA = "metadata";
-    public static final String PN_X_PARSED_BY = "X-Parsed-By";
-
-    @Reference
-    private ResourceResolverFactory factory;
-
-    private static final Logger log = LoggerFactory.getLogger(FileMetadataExtractor.class);
-
-    public void extractMetadata(File file) throws IOException, SAXException, TikaException {
-        extractMetadata(file.getResource());
-    }
-
-    public void extractMetadata(Resource resource) throws IOException, SAXException, TikaException {
-
-        log.info("Extracting metadata from {}", resource.getPath());
-        ResourceResolver resolver = resource.getResourceResolver();
-        InputStream is = resource.adaptTo(InputStream.class);
-        Resource content = resource.getChild(JcrConstants.JCR_CONTENT);
-        if (content == null) {
-            log.warn("Content resource is null");
-            return;
-        }
-        Map<String, Object> properties = new HashMap<>();
-        Resource metadata = content.getChild(NN_METADATA);
-        if (metadata != null) {
-            properties = metadata.adaptTo(ModifiableValueMap.class);
-        } else {
-            properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
-        }
-        if (properties != null) {
-            Parser parser = new AutoDetectParser();
-            BodyContentHandler handler = new BodyContentHandler();
-            Metadata md = new Metadata();
-            ParseContext context = new ParseContext();
-            parser.parse(is, handler, md, context);
-            for (String name : md.names()) {
-                updateProperty(properties, name, md);
-            }
-            if (metadata == null) {
-                resolver.create(content, NN_METADATA, properties);
-            }
-            resolver.commit();
-            log.info("Metadata extracted from {}", resource.getPath());
-        } else {
-            log.warn("Failed to update metadata for {}", resource.getPath());
-        }
-
-    }
-
-    private void updateProperty(Map<String, Object> properties, String name, Metadata metadata) {
-        log.trace("Updating property: {}", name);
-        String filtered = Text.escapeIllegalJcrChars(name);
-        Property property = Property.get(name);
-        if (property != null) {
-            if (metadata.isMultiValued(property)) {
-                properties.put(filtered, metadata.getValues(property));
-            } else if (metadata.getDate(property) != null) {
-                Calendar cal = Calendar.getInstance();
-                cal.setTime(metadata.getDate(property));
-                properties.put(filtered, cal);
-            } else if (metadata.getInt(property) != null) {
-                properties.put(filtered, metadata.getInt(property));
-            } else {
-                properties.put(filtered, metadata.get(property));
-            }
-        } else {
-            properties.put(filtered, metadata.get(name));
-        }
-    }
-
-    @Override
-    public void onChange(List<ResourceChange> changes) {
-        Map<String, Object> serviceParams = new HashMap<>();
-        serviceParams.put(ResourceResolverFactory.SUBSERVICE, "sling-cms-metadata");
-        ResourceResolver serviceResolver = null;
-        try {
-            serviceResolver = factory.getServiceResourceResolver(serviceParams);
-            for (ResourceChange rc : changes) {
-                Resource changed = serviceResolver.getResource(rc.getPath());
-                try {
-                    extractMetadata(changed);
-                } catch (Throwable t) {
-                    log.warn("Failed to extract metadata due to exception", t);
-                }
-            }
-        } catch (LoginException e) {
-            log.error("Exception getting service user", e);
-        } finally {
-            if (serviceResolver != null) {
-                serviceResolver.close();
-            }
-        }
-
-    }
-
-}
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/listeners/FileMetadataExtractorListener.java b/core/src/main/java/org/apache/sling/cms/core/internal/listeners/FileMetadataExtractorListener.java
new file mode 100644
index 0000000..008c81f
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/listeners/FileMetadataExtractorListener.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.listeners;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+import org.apache.sling.api.resource.LoginException;
+import org.apache.sling.api.resource.ResourceResolver;
+import org.apache.sling.api.resource.ResourceResolverFactory;
+import org.apache.sling.api.resource.observation.ExternalResourceChangeListener;
+import org.apache.sling.api.resource.observation.ResourceChange;
+import org.apache.sling.api.resource.observation.ResourceChangeListener;
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.FileMetadataExtractor;
+import org.osgi.service.component.annotations.Component;
+import org.osgi.service.component.annotations.Reference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A Resource Change Listener which extracts the metadata from sling:Files when
+ * they are uploaded.
+ */
+@Component(service = { FileMetadataExtractorListener.class, ResourceChangeListener.class,
+        ExternalResourceChangeListener.class }, property = { ResourceChangeListener.CHANGES + "=ADDED",
+                ResourceChangeListener.PATHS + "=/content",
+                ResourceChangeListener.PATHS + "=/static" }, immediate = true)
+public class FileMetadataExtractorListener implements ResourceChangeListener, ExternalResourceChangeListener {
+
+    @Reference
+    private FileMetadataExtractor extractor;
+
+    @Reference
+    private ResourceResolverFactory factory;
+
+    private static final Logger log = LoggerFactory.getLogger(FileMetadataExtractorListener.class);
+
+    @Override
+    public void onChange(List<ResourceChange> changes) {
+        Map<String, Object> serviceParams = new HashMap<>();
+        serviceParams.put(ResourceResolverFactory.SUBSERVICE, "sling-cms-metadata");
+        ResourceResolver serviceResolver = null;
+        try {
+            serviceResolver = factory.getServiceResourceResolver(serviceParams);
+            for (ResourceChange rc : changes) {
+                handleChange(serviceResolver, rc);
+            }
+        } catch (LoginException e) {
+            log.error("Exception getting service user", e);
+        } finally {
+            if (serviceResolver != null) {
+                serviceResolver.close();
+            }
+        }
+
+    }
+
+    private void handleChange(ResourceResolver serviceResolver, ResourceChange rc) {
+        File changed = Optional.ofNullable(serviceResolver.getResource(rc.getPath())).map(r -> r.adaptTo(File.class))
+                .orElse(null);
+        try {
+            if (changed != null) {
+                extractor.updateMetadata(changed);
+            } else {
+                log.warn("Failed to get File from: {}", rc.getPath());
+            }
+        } catch (IOException t) {
+            log.warn("Failed to extract metadata due to exception", t);
+        }
+    }
+
+}
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/models/FileImpl.java b/core/src/main/java/org/apache/sling/cms/core/internal/models/FileImpl.java
index 91d6ea2..ded1c6b 100644
--- a/core/src/main/java/org/apache/sling/cms/core/internal/models/FileImpl.java
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/models/FileImpl.java
@@ -28,10 +28,10 @@ import org.apache.jackrabbit.util.Text;
 import org.apache.sling.api.resource.Resource;
 import org.apache.sling.api.resource.ValueMap;
 import org.apache.sling.api.wrappers.ValueMapDecorator;
+import org.apache.sling.cms.CMSConstants;
 import org.apache.sling.cms.File;
 import org.apache.sling.cms.Site;
 import org.apache.sling.cms.SiteManager;
-import org.apache.sling.cms.core.internal.listeners.FileMetadataExtractor;
 import org.apache.sling.models.annotations.Default;
 import org.apache.sling.models.annotations.Model;
 import org.apache.sling.models.annotations.Optional;
@@ -118,7 +118,7 @@ public class FileImpl implements File {
 
     @Override
     public ValueMap getMetadata() {
-        Resource metadata = this.getContentResource().getChild(FileMetadataExtractor.NN_METADATA);
+        Resource metadata = this.getContentResource().getChild(CMSConstants.NN_METADATA);
         Map<String, Object> data = new TreeMap<>();
         if (metadata != null) {
             metadata.getValueMap().entrySet()
diff --git a/core/src/main/java/org/apache/sling/cms/core/internal/servlets/TransformServlet.java b/core/src/main/java/org/apache/sling/cms/core/internal/servlets/TransformServlet.java
new file mode 100644
index 0000000..df79c9e
--- /dev/null
+++ b/core/src/main/java/org/apache/sling/cms/core/internal/servlets/TransformServlet.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal.servlets;
+
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Optional;
+
+import javax.imageio.ImageIO;
+import javax.servlet.Servlet;
+import javax.servlet.ServletException;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.sling.api.SlingHttpServletRequest;
+import org.apache.sling.api.SlingHttpServletResponse;
+import org.apache.sling.api.resource.Resource;
+import org.apache.sling.api.servlets.SlingSafeMethodsServlet;
+import org.apache.sling.cms.File;
+import org.osgi.service.component.annotations.Component;
+
+import com.google.common.net.MediaType;
+
+import net.coobird.thumbnailator.Thumbnails;
+import net.coobird.thumbnailator.Thumbnails.Builder;
+import net.coobird.thumbnailator.geometry.Positions;
+
+@Component(service = { Servlet.class }, property = { "sling.servlet.extensions=transform",
+        "sling.servlet.resourceTypes=sling:File", "sling.servlet.resourceTypes=nt:file" })
+public class TransformServlet extends SlingSafeMethodsServlet {
+
+    private static final long serialVersionUID = -1513067546618762171L;
+
+    protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse response)
+            throws ServletException, IOException {
+
+        Builder<? extends InputStream> builder = Thumbnails.of(getInputStream(request.getResource()));
+        for (String cmd : request.getRequestPathInfo().getSuffix().split("/")) {
+            if (cmd.startsWith("resize-")) {
+                builder.size(Integer.parseInt(cmd.split("\\-")[1], 10), Integer.parseInt(cmd.split("\\-")[2], 10));
+            }
+        }
+        builder.crop(Positions.CENTER);
+        response.setContentType("image/png");
+        builder.toOutputStream(response.getOutputStream());
+    }
+
+    private InputStream getInputStream(Resource resource) throws IOException {
+        String contentType = Optional.ofNullable(resource.adaptTo(File.class)).map(File::getContentType).orElse("");
+        if (contentType.startsWith("image")) {
+            return resource.adaptTo(InputStream.class);
+        }
+        if (MediaType.PDF.toString().equals(contentType)) {
+            PDDocument document = PDDocument.load(resource.adaptTo(InputStream.class));
+            PDFRenderer pdfRenderer = new PDFRenderer(document);
+            BufferedImage bim = pdfRenderer.renderImageWithDPI(0, 300, ImageType.RGB);
+            ByteArrayOutputStream os = new ByteArrayOutputStream();
+            ImageIO.write(bim, "jpeg", os);
+            document.close();
+            return new ByteArrayInputStream(os.toByteArray());
+        }
+        return null;
+    }
+}
diff --git a/core/src/test/java/org/apache/sling/cms/core/internal/FileMetadataExtractorImplTest.java b/core/src/test/java/org/apache/sling/cms/core/internal/FileMetadataExtractorImplTest.java
new file mode 100644
index 0000000..9ddf32f
--- /dev/null
+++ b/core/src/test/java/org/apache/sling/cms/core/internal/FileMetadataExtractorImplTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sling.cms.core.internal;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+import org.apache.sling.api.resource.Resource;
+import org.apache.sling.cms.File;
+import org.apache.sling.cms.FileMetadataExtractor;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FileMetadataExtractorImplTest {
+
+    private static final Logger log = LoggerFactory.getLogger(FileMetadataExtractorImplTest.class);
+
+    private File file;
+
+    @Before
+    public void init() {
+
+        Resource resource = Mockito.mock(Resource.class);
+        Mockito.when(resource.adaptTo(InputStream.class))
+                .thenReturn(FileMetadataExtractorImplTest.class.getClassLoader().getResourceAsStream("apache.png"));
+
+        file = Mockito.mock(File.class);
+        Mockito.when(file.getResource()).thenReturn(resource);
+    }
+
+    @Test
+    public void testExtractMetadata() throws IOException {
+        FileMetadataExtractor extractor = new FileMetadataExtractorImpl();
+        Map<String, Object> metadata = extractor.extractMetadata(file);
+
+        assertNotNull(metadata);
+        assertTrue(metadata.size() > 0);
+
+        log.info("Extracted metadata: {}", metadata);
+    }
+}
diff --git a/core/src/test/resources/apache.png b/core/src/test/resources/apache.png
new file mode 100644
index 0000000..fc3f667
Binary files /dev/null and b/core/src/test/resources/apache.png differ
diff --git a/metadata-extractor/pom.xml b/metadata-extractor/pom.xml
new file mode 100644
index 0000000..59327d5
--- /dev/null
+++ b/metadata-extractor/pom.xml
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional 
+    information regarding copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file except 
+    in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to 
+    in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See 
+    the License for the specific language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"
+>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>org.apache.sling.cms</artifactId>
+        <groupId>org.apache.sling</groupId>
+        <version>0.11.3-SNAPSHOT</version>
+    </parent>
+    <artifactId>org.apache.sling.cms.metadata-extractor</artifactId>
+    <packaging>bundle</packaging>
+    <name>Apache Sling - Metadata Extractor</name>
+    <description>OSGi Bundle Wrapper for https://github.com/drewnoakes/metadata-extractor</description>
+
+    <properties>
+        <sling.java.version>8</sling.java.version>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.felix</groupId>
+                <artifactId>maven-bundle-plugin</artifactId>
+                <extensions>true</extensions>
+                <configuration>
+                    <instructions>
+                        <Embed-Dependency>*;scope=compile|runtime</Embed-Dependency>
+                        <Export-Package>com.drew.*</Export-Package>
+                        <Import-Package>!com.adobe.xmp,!com.adobe.xmp.impl,!com.adobe.xmp.options,!com.adobe.xmp.properties,*</Import-Package>
+                    </instructions>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>8</source>
+                    <target>8</target>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.sling</groupId>
+                <artifactId>maven-sling-plugin</artifactId>
+                <configuration>
+                    <slingUrl>${sling.protocol}://${sling.host}:${sling.port}/system/console</slingUrl>
+                    <user>${sling.username}</user>
+                    <password>${sling.password}</password>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <useSystemClassLoader>false</useSystemClassLoader>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <profiles>
+        <profile>
+            <id>autoInstallBundle</id>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.sling</groupId>
+                        <artifactId>maven-sling-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>install-bundle</id>
+                                <goals>
+                                    <goal>install</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+
+    <dependencies>
+        <dependency>
+            <groupId>com.drewnoakes</groupId>
+            <artifactId>metadata-extractor</artifactId>
+            <version>2.11.0</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.adobe.xmp</groupId>
+            <artifactId>xmpcore</artifactId>
+            <version>6.0.6</version>
+            <scope>compile</scope>
+        </dependency>
+    </dependencies>
+</project>
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 0939d7e..f186526 100644
--- a/pom.xml
+++ b/pom.xml
@@ -37,6 +37,7 @@
     <modules>
         <module>api</module>
         <module>core</module>
+        <module>metadata-extractor</module>
         <module>ui</module>
         <module>reference</module>
         <module>builder</module>
@@ -225,6 +226,18 @@
                 <version>2.2.0</version>
                 <scope>provided</scope>
             </dependency>
+            <dependency>
+                <groupId>org.apache.pdfbox</groupId>
+                <artifactId>pdfbox</artifactId>
+                <version>2.0.12</version>
+                <scope>provided</scope>
+            </dependency>
+            <dependency>
+                <groupId>org.mockito</groupId>
+                <artifactId>mockito-core</artifactId>
+                <version>2.27.0</version>
+                <scope>test</scope>
+            </dependency>
         </dependencies>
     </dependencyManagement>
 
diff --git a/ui/src/main/resources/jcr_root/libs/sling-cms/install/org.apache.sling.serviceusermapping.impl.ServiceUserMapperImpl.amended-sling-cms-metadata.json b/ui/src/main/resources/jcr_root/libs/sling-cms/install/org.apache.sling.serviceusermapping.impl.ServiceUserMapperImpl.amended-sling-cms-metadata.json
new file mode 100644
index 0000000..458a8de
--- /dev/null
+++ b/ui/src/main/resources/jcr_root/libs/sling-cms/install/org.apache.sling.serviceusermapping.impl.ServiceUserMapperImpl.amended-sling-cms-metadata.json
@@ -0,0 +1,6 @@
+{
+    "jcr:primaryType": "sling:OsgiConfig",
+    "user.mapping": [
+        "org.apache.sling.cms.core:sling-cms-metadata=sling-cms-metadata"
+    ]
+}
\ No newline at end of file