You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/02/19 02:26:47 UTC

tika git commit: TIKA-1860 - Propose bundle artifact for multimedia module

Repository: tika
Updated Branches:
  refs/heads/bundle-classifier [created] c6d447600


TIKA-1860 - Propose bundle artifact for multimedia module

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c6d44760
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c6d44760
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c6d44760

Branch: refs/heads/bundle-classifier
Commit: c6d4476003c9738617f323756b0211e310728c18
Parents: 6178e98
Author: Bob Paulin <bo...@apache.org>
Authored: Thu Feb 18 19:25:53 2016 -0600
Committer: Bob Paulin <bo...@apache.org>
Committed: Thu Feb 18 19:25:53 2016 -0600

----------------------------------------------------------------------
 .gitignore                                      |   3 +-
 .../tika/osgi/TikaAbstractBundleActivator.java  |  12 +-
 tika-parser-modules/pom.xml                     |  60 +++++-
 .../tika-parser-multimedia-module/pom.xml       |  83 ++++++++
 .../module/multimedia/internal/Activator.java   |  36 ++++
 .../java/org/apache/tika/module/BundleIT.java   | 203 +++++++++++++++++++
 6 files changed, 394 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index c262c68..7da9077 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ target
 *.iws
 *.bin
 nbactions.xml
-nb-configuration.xml
\ No newline at end of file
+nb-configuration.xml
+/bin/

http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java b/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
index a6250e2..b959147 100644
--- a/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
+++ b/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
@@ -20,6 +20,7 @@ import java.util.Dictionary;
 import java.util.Enumeration;
 import java.util.Locale;
 import java.util.Properties;
+import java.util.ServiceLoader;
 
 import org.apache.tika.parser.Parser;
 import org.osgi.framework.BundleActivator;
@@ -37,8 +38,17 @@ public abstract class TikaAbstractBundleActivator implements BundleActivator {
         return serviceProps;
 
     }
+    
+    public void registerTikaParserServiceLoader(BundleContext context, ClassLoader loader)
+    {
+        ServiceLoader<Parser> serviceLoader = ServiceLoader.load(Parser.class, loader);
+        for(Parser currentParser: serviceLoader)
+        {
+            registerTikaService(context, currentParser, null);
+        }
+    }
 
-    public void registerTikaService(BundleContext context, Parser parserService,
+    void registerTikaService(BundleContext context, Parser parserService,
             Dictionary additionalServiceProperties) {
         String parserFullyClassifiedName = parserService.getClass().getCanonicalName().toLowerCase(Locale.US);
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 8e71c1b..61b42a0 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -59,9 +59,67 @@
   </modules>
 
   <dependencies>
+      <dependency>
+      <groupId>org.osgi</groupId>
+      <artifactId>org.osgi.core</artifactId>
+      <scope>provided</scope>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.osgi</groupId>
+      <artifactId>org.osgi.compendium</artifactId>
+      <scope>provided</scope>
+      <optional>true</optional>
+    </dependency>
     <!-- Test dependencies -->
     <dependency>
-      <groupId>org.apache.tika</groupId>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <version>1.7</version>
+      <scope>test</scope>
+    </dependency>
+     <dependency>
+      <groupId>org.ops4j.pax.exam</groupId>
+      <artifactId>pax-exam-junit4</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.ops4j.pax.exam</groupId>
+      <artifactId>pax-exam-container-native</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.framework</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.ops4j.pax.exam</groupId>
+      <artifactId>pax-exam-link-assembly</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.ops4j.pax.url</groupId>
+      <artifactId>pax-url-aether</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>javax.inject</groupId>
+      <artifactId>javax.inject</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
       <artifactId>tika-test-resources</artifactId>
       <version>${project.version}</version>
       <type>test-jar</type>

http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/pom.xml b/tika-parser-modules/tika-parser-multimedia-module/pom.xml
index f15f3bd..13a7705 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/pom.xml
+++ b/tika-parser-modules/tika-parser-multimedia-module/pom.xml
@@ -22,6 +22,7 @@
   <artifactId>tika-parser-multimedia-module</artifactId>
   <name>Apache Tika parser multimedia module</name>
   <url>http://tika.apache.org/</url>
+  <packaging>bundle</packaging>
   
   <properties>
     <metadata.extractor.version>2.8.0</metadata.extractor.version>
@@ -121,6 +122,88 @@
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
       </plugin>
+            <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <classifier>bundle</classifier>
+          <instructions>
+            <Bundle-Activator>org.apache.tika.module.multimedia.internal.Activator</Bundle-Activator>
+            <_runsystempackages>com.sun.xml.bind.marshaller,
+              com.sun.xml.internal.bind.marshaller</_runsystempackages>
+            <Embed-Dependency>
+              metadata-extractor;inline=true,
+              xmpcore;inline=true,
+              commons-codec;inline=true,
+              commons-io;inline=true,
+              jempbox;inline=true,
+              fontbox;inline=true,
+              poi;inline=true,
+              isoparser;inline=true,
+              aspectjrt;inline=true
+            </Embed-Dependency> 
+            <Embed-Transitive>true</Embed-Transitive>
+            <Export-Package>
+              org.apache.tika.parser.image.*,
+              org.apache.tika.parser.jpeg.*,
+              org.apache.tika.parser.audio.*,
+              org.apache.tika.parser.video.*,
+              org.apache.tika.parser.mp3.*,
+              org.apache.tika.parser.mp4.*
+            </Export-Package>
+            <Import-Package>
+              *,
+              com.adobe.xmp;resolution:=optional,
+              com.adobe.xmp.properties;resolution:=optional,
+              android.util;resolution:=optional
+            </Import-Package>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <useDefaultManifestFile>true</useDefaultManifestFile>
+              <includes>
+                <include>org/apache/tika/**</include>
+                <include>META-INF/**</include>
+              </includes>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>integration-test</goal>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <systemPropertyVariables>
+            <org.ops4j.pax.logging.DefaultServiceLog.level>
+              WARN
+            </org.ops4j.pax.logging.DefaultServiceLog.level>
+          </systemPropertyVariables>
+          <systemProperties>
+            <property>
+              <name>project.bundle.file</name>
+              <value>target/${project.build.finalName}-bundle.jar</value>
+            </property>
+          </systemProperties>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 

http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java
new file mode 100644
index 0000000..7f53312
--- /dev/null
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.multimedia.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+    @Override
+    public void start(BundleContext context) throws Exception {
+
+        registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+    }
+
+    @Override
+    public void stop(BundleContext context) throws Exception {
+
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java
new file mode 100644
index 0000000..5817691
--- /dev/null
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNotNull;
+import static org.ops4j.pax.exam.CoreOptions.bundle;
+import static org.ops4j.pax.exam.CoreOptions.junitBundles;
+import static org.ops4j.pax.exam.CoreOptions.options;
+import static org.ops4j.pax.exam.CoreOptions.mavenBundle;
+
+import javax.inject.Inject;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.net.URISyntaxException;
+import java.util.Dictionary;
+
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.osgi.TikaService;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.ops4j.pax.exam.Configuration;
+import org.ops4j.pax.exam.Option;
+import org.ops4j.pax.exam.junit.PaxExam;
+import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
+import org.ops4j.pax.exam.spi.reactors.PerMethod;
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.xml.sax.ContentHandler;
+
+@RunWith(PaxExam.class)
+@ExamReactorStrategy(PerMethod.class)
+public class BundleIT {
+
+    private static final String BUNDLE_JAR_SYS_PROP = "project.bundle.file";
+    @Inject
+    private BundleContext bc;
+
+    @Configuration
+    public Option[] configuration() throws IOException, URISyntaxException {
+        String bundleFileName = System.getProperty(BUNDLE_JAR_SYS_PROP);
+        return options(junitBundles(), mavenBundle("org.apache.tika", "tika-core"),
+                bundle(new File(bundleFileName).toURI().toString()));
+    }
+
+    @Test
+    public void testBundleLoaded() throws Exception {
+        boolean hasCore = false, hasBundle = false;
+        for (Bundle b : bc.getBundles()) {
+            if ("org.apache.tika.core".equals(b.getSymbolicName())) {
+                hasCore = true;
+                assertEquals("Core not activated", Bundle.ACTIVE, b.getState());
+            }
+            if ("org.apache.tika.parser-multimedia-module".equals(b.getSymbolicName())) {
+                hasBundle = true;
+                assertEquals("Bundle not activated", Bundle.ACTIVE, b.getState());
+            }
+        }
+        assertTrue("Core bundle not found", hasCore);
+        assertTrue("Image bundle not found", hasBundle);
+    }
+
+    @Test
+    public void testImageParser() throws Exception {
+        TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+        InputStream stream = bc.getBundle().getResource("/test-documents/testPNG.png").openStream();
+
+        assertNotNull(stream);
+
+        Metadata metadata = new Metadata();
+        TikaInputStream tikaStream = TikaInputStream.get(stream);
+        MediaType type = tikaService.detect(tikaStream, metadata);
+
+        assertEquals("Media Type should be PNG", MediaType.image("png"), type);
+
+        metadata.add(Metadata.CONTENT_TYPE, type.toString());
+        Writer writer = new StringWriter();
+        ContentHandler contentHandler = new BodyContentHandler(writer);
+        ParseContext context = new ParseContext();
+
+        tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+        assertEquals("Image Output Width Should Match", "100", metadata.get(Metadata.IMAGE_WIDTH));
+    }
+
+    @Test
+    public void testJpegParser() throws Exception {
+
+        TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+        InputStream stream = bc.getBundle().getResource("/test-documents/testJPEG.jpg").openStream();
+
+        assertNotNull(stream);
+
+        Metadata metadata = new Metadata();
+        TikaInputStream tikaStream = TikaInputStream.get(stream);
+        MediaType type = tikaService.detect(tikaStream, metadata);
+
+        assertEquals("Media Type should be JPEG", MediaType.image("jpeg"), type);
+
+        metadata.add(Metadata.CONTENT_TYPE, type.toString());
+        Writer writer = new StringWriter();
+        ContentHandler contentHandler = new BodyContentHandler(writer);
+        ParseContext context = new ParseContext();
+
+        tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+        assertEquals("Jpg Output Width Should Match", "100", metadata.get(Metadata.IMAGE_WIDTH));
+    }
+    @Test
+    public void testVideoParser() throws Exception {
+        TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+        InputStream stream = bc.getBundle().getResource("/test-documents/testFLV.flv").openStream();
+
+        assertNotNull(stream);
+
+        Metadata metadata = new Metadata();
+        TikaInputStream tikaStream = TikaInputStream.get(stream);
+        MediaType type = tikaService.detect(tikaStream, metadata);
+
+        assertEquals("Media Type should be FLV", MediaType.video("x-flv"), type);
+
+        metadata.add(Metadata.CONTENT_TYPE, type.toString());
+        Writer writer = new StringWriter();
+        ContentHandler contentHandler = new BodyContentHandler(writer);
+        ParseContext context = new ParseContext();
+
+        tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+        assertEquals("Video Should have audio", "true", metadata.get("hasAudio"));
+
+    }
+
+    @Test
+    public void testMp3Parser() throws Exception {
+        TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+        InputStream stream = bc.getBundle().getResource("/test-documents/testMP3i18n.mp3").openStream();
+
+        assertNotNull(stream);
+
+        Metadata metadata = new Metadata();
+        TikaInputStream tikaStream = TikaInputStream.get(stream);
+        MediaType type = tikaService.detect(tikaStream, metadata);
+
+        assertEquals("Media Type should be MP3", MediaType.audio("mpeg"), type);
+
+        metadata.add(Metadata.CONTENT_TYPE, type.toString());
+        Writer writer = new StringWriter();
+        ContentHandler contentHandler = new BodyContentHandler(writer);
+        ParseContext context = new ParseContext();
+
+        tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+        assertEquals("MP3 should have title", "Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
+
+    }
+    
+    @Test
+    public void testMidiParser() throws Exception {
+        TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+        InputStream stream = bc.getBundle().getResource("/test-documents/testMID.mid").openStream();
+
+        assertNotNull(stream);
+
+        Metadata metadata = new Metadata();
+        TikaInputStream tikaStream = TikaInputStream.get(stream);
+        MediaType type = tikaService.detect(tikaStream, metadata);
+
+        assertEquals("Media Type should be Midi", MediaType.audio("midi"), type);
+
+        metadata.add(Metadata.CONTENT_TYPE, type.toString());
+        Writer writer = new StringWriter();
+        ContentHandler contentHandler = new BodyContentHandler(writer);
+        ParseContext context = new ParseContext();
+
+        tikaService.parse(tikaStream, contentHandler, metadata, context);
+        assertEquals("Midi should have 2 tracks", "2", metadata.get("tracks"));
+    }
+
+}