You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/02/19 02:26:47 UTC
tika git commit: TIKA-1860 - Propose bundle artifact for multimedia
module
Repository: tika
Updated Branches:
refs/heads/bundle-classifier [created] c6d447600
TIKA-1860 - Propose bundle artifact for multimedia module
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c6d44760
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c6d44760
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c6d44760
Branch: refs/heads/bundle-classifier
Commit: c6d4476003c9738617f323756b0211e310728c18
Parents: 6178e98
Author: Bob Paulin <bo...@apache.org>
Authored: Thu Feb 18 19:25:53 2016 -0600
Committer: Bob Paulin <bo...@apache.org>
Committed: Thu Feb 18 19:25:53 2016 -0600
----------------------------------------------------------------------
.gitignore | 3 +-
.../tika/osgi/TikaAbstractBundleActivator.java | 12 +-
tika-parser-modules/pom.xml | 60 +++++-
.../tika-parser-multimedia-module/pom.xml | 83 ++++++++
.../module/multimedia/internal/Activator.java | 36 ++++
.../java/org/apache/tika/module/BundleIT.java | 203 +++++++++++++++++++
6 files changed, 394 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index c262c68..7da9077 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ target
*.iws
*.bin
nbactions.xml
-nb-configuration.xml
\ No newline at end of file
+nb-configuration.xml
+/bin/
http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java b/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
index a6250e2..b959147 100644
--- a/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
+++ b/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java
@@ -20,6 +20,7 @@ import java.util.Dictionary;
import java.util.Enumeration;
import java.util.Locale;
import java.util.Properties;
+import java.util.ServiceLoader;
import org.apache.tika.parser.Parser;
import org.osgi.framework.BundleActivator;
@@ -37,8 +38,17 @@ public abstract class TikaAbstractBundleActivator implements BundleActivator {
return serviceProps;
}
+
+ public void registerTikaParserServiceLoader(BundleContext context, ClassLoader loader)
+ {
+ ServiceLoader<Parser> serviceLoader = ServiceLoader.load(Parser.class, loader);
+ for(Parser currentParser: serviceLoader)
+ {
+ registerTikaService(context, currentParser, null);
+ }
+ }
- public void registerTikaService(BundleContext context, Parser parserService,
+ void registerTikaService(BundleContext context, Parser parserService,
Dictionary additionalServiceProperties) {
String parserFullyClassifiedName = parserService.getClass().getCanonicalName().toLowerCase(Locale.US);
http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index 8e71c1b..61b42a0 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -59,9 +59,67 @@
</modules>
<dependencies>
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.core</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.compendium</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
<!-- Test dependencies -->
<dependency>
- <groupId>org.apache.tika</groupId>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mockito</groupId>
+ <artifactId>mockito-core</artifactId>
+ <version>1.7</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.ops4j.pax.exam</groupId>
+ <artifactId>pax-exam-junit4</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.ops4j.pax.exam</groupId>
+ <artifactId>pax-exam-container-native</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.framework</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.ops4j.pax.exam</groupId>
+ <artifactId>pax-exam-link-assembly</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.ops4j.pax.url</groupId>
+ <artifactId>pax-url-aether</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax.inject</groupId>
+ <artifactId>javax.inject</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
<artifactId>tika-test-resources</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/pom.xml b/tika-parser-modules/tika-parser-multimedia-module/pom.xml
index f15f3bd..13a7705 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/pom.xml
+++ b/tika-parser-modules/tika-parser-multimedia-module/pom.xml
@@ -22,6 +22,7 @@
<artifactId>tika-parser-multimedia-module</artifactId>
<name>Apache Tika parser multimedia module</name>
<url>http://tika.apache.org/</url>
+ <packaging>bundle</packaging>
<properties>
<metadata.extractor.version>2.8.0</metadata.extractor.version>
@@ -121,6 +122,88 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
</plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <classifier>bundle</classifier>
+ <instructions>
+ <Bundle-Activator>org.apache.tika.module.multimedia.internal.Activator</Bundle-Activator>
+ <_runsystempackages>com.sun.xml.bind.marshaller,
+ com.sun.xml.internal.bind.marshaller</_runsystempackages>
+ <Embed-Dependency>
+ metadata-extractor;inline=true,
+ xmpcore;inline=true,
+ commons-codec;inline=true,
+ commons-io;inline=true,
+ jempbox;inline=true,
+ fontbox;inline=true,
+ poi;inline=true,
+ isoparser;inline=true,
+ aspectjrt;inline=true
+ </Embed-Dependency>
+ <Embed-Transitive>true</Embed-Transitive>
+ <Export-Package>
+ org.apache.tika.parser.image.*,
+ org.apache.tika.parser.jpeg.*,
+ org.apache.tika.parser.audio.*,
+ org.apache.tika.parser.video.*,
+ org.apache.tika.parser.mp3.*,
+ org.apache.tika.parser.mp4.*
+ </Export-Package>
+ <Import-Package>
+ *,
+ com.adobe.xmp;resolution:=optional,
+ com.adobe.xmp.properties;resolution:=optional,
+ android.util;resolution:=optional
+ </Import-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <configuration>
+ <useDefaultManifestFile>true</useDefaultManifestFile>
+ <includes>
+ <include>org/apache/tika/**</include>
+ <include>META-INF/**</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <systemPropertyVariables>
+ <org.ops4j.pax.logging.DefaultServiceLog.level>
+ WARN
+ </org.ops4j.pax.logging.DefaultServiceLog.level>
+ </systemPropertyVariables>
+ <systemProperties>
+ <property>
+ <name>project.bundle.file</name>
+ <value>target/${project.build.finalName}-bundle.jar</value>
+ </property>
+ </systemProperties>
+ </configuration>
+ </plugin>
</plugins>
</build>
http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java
new file mode 100644
index 0000000..7f53312
--- /dev/null
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module.multimedia.internal;
+
+import org.apache.tika.osgi.TikaAbstractBundleActivator;
+import org.osgi.framework.BundleContext;
+
+public class Activator extends TikaAbstractBundleActivator {
+
+ @Override
+ public void start(BundleContext context) throws Exception {
+
+ registerTikaParserServiceLoader(context, Activator.class.getClassLoader());
+
+ }
+
+ @Override
+ public void stop(BundleContext context) throws Exception {
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java
new file mode 100644
index 0000000..5817691
--- /dev/null
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.module;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertNotNull;
+import static org.ops4j.pax.exam.CoreOptions.bundle;
+import static org.ops4j.pax.exam.CoreOptions.junitBundles;
+import static org.ops4j.pax.exam.CoreOptions.options;
+import static org.ops4j.pax.exam.CoreOptions.mavenBundle;
+
+import javax.inject.Inject;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.net.URISyntaxException;
+import java.util.Dictionary;
+
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.osgi.TikaService;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.ops4j.pax.exam.Configuration;
+import org.ops4j.pax.exam.Option;
+import org.ops4j.pax.exam.junit.PaxExam;
+import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
+import org.ops4j.pax.exam.spi.reactors.PerMethod;
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.xml.sax.ContentHandler;
+
+@RunWith(PaxExam.class)
+@ExamReactorStrategy(PerMethod.class)
+public class BundleIT {
+
+ private static final String BUNDLE_JAR_SYS_PROP = "project.bundle.file";
+ @Inject
+ private BundleContext bc;
+
+ @Configuration
+ public Option[] configuration() throws IOException, URISyntaxException {
+ String bundleFileName = System.getProperty(BUNDLE_JAR_SYS_PROP);
+ return options(junitBundles(), mavenBundle("org.apache.tika", "tika-core"),
+ bundle(new File(bundleFileName).toURI().toString()));
+ }
+
+ @Test
+ public void testBundleLoaded() throws Exception {
+ boolean hasCore = false, hasBundle = false;
+ for (Bundle b : bc.getBundles()) {
+ if ("org.apache.tika.core".equals(b.getSymbolicName())) {
+ hasCore = true;
+ assertEquals("Core not activated", Bundle.ACTIVE, b.getState());
+ }
+ if ("org.apache.tika.parser-multimedia-module".equals(b.getSymbolicName())) {
+ hasBundle = true;
+ assertEquals("Bundle not activated", Bundle.ACTIVE, b.getState());
+ }
+ }
+ assertTrue("Core bundle not found", hasCore);
+ assertTrue("Image bundle not found", hasBundle);
+ }
+
+ @Test
+ public void testImageParser() throws Exception {
+ TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+ InputStream stream = bc.getBundle().getResource("/test-documents/testPNG.png").openStream();
+
+ assertNotNull(stream);
+
+ Metadata metadata = new Metadata();
+ TikaInputStream tikaStream = TikaInputStream.get(stream);
+ MediaType type = tikaService.detect(tikaStream, metadata);
+
+ assertEquals("Media Type should be PNG", MediaType.image("png"), type);
+
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ ParseContext context = new ParseContext();
+
+ tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+ assertEquals("Image Output Width Should Match", "100", metadata.get(Metadata.IMAGE_WIDTH));
+ }
+
+ @Test
+ public void testJpegParser() throws Exception {
+
+ TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+ InputStream stream = bc.getBundle().getResource("/test-documents/testJPEG.jpg").openStream();
+
+ assertNotNull(stream);
+
+ Metadata metadata = new Metadata();
+ TikaInputStream tikaStream = TikaInputStream.get(stream);
+ MediaType type = tikaService.detect(tikaStream, metadata);
+
+ assertEquals("Media Type should be JPEG", MediaType.image("jpeg"), type);
+
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ ParseContext context = new ParseContext();
+
+ tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+ assertEquals("Jpg Output Width Should Match", "100", metadata.get(Metadata.IMAGE_WIDTH));
+ }
+ @Test
+ public void testVideoParser() throws Exception {
+ TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+ InputStream stream = bc.getBundle().getResource("/test-documents/testFLV.flv").openStream();
+
+ assertNotNull(stream);
+
+ Metadata metadata = new Metadata();
+ TikaInputStream tikaStream = TikaInputStream.get(stream);
+ MediaType type = tikaService.detect(tikaStream, metadata);
+
+ assertEquals("Media Type should be FLV", MediaType.video("x-flv"), type);
+
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ ParseContext context = new ParseContext();
+
+ tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+ assertEquals("Video Should have audio", "true", metadata.get("hasAudio"));
+
+ }
+
+ @Test
+ public void testMp3Parser() throws Exception {
+ TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+ InputStream stream = bc.getBundle().getResource("/test-documents/testMP3i18n.mp3").openStream();
+
+ assertNotNull(stream);
+
+ Metadata metadata = new Metadata();
+ TikaInputStream tikaStream = TikaInputStream.get(stream);
+ MediaType type = tikaService.detect(tikaStream, metadata);
+
+ assertEquals("Media Type should be MP3", MediaType.audio("mpeg"), type);
+
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ ParseContext context = new ParseContext();
+
+ tikaService.parse(tikaStream, contentHandler, metadata, context);
+
+ assertEquals("MP3 should have title", "Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
+
+ }
+
+ @Test
+ public void testMidiParser() throws Exception {
+ TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class));
+ InputStream stream = bc.getBundle().getResource("/test-documents/testMID.mid").openStream();
+
+ assertNotNull(stream);
+
+ Metadata metadata = new Metadata();
+ TikaInputStream tikaStream = TikaInputStream.get(stream);
+ MediaType type = tikaService.detect(tikaStream, metadata);
+
+ assertEquals("Media Type should be Midi", MediaType.audio("midi"), type);
+
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ ParseContext context = new ParseContext();
+
+ tikaService.parse(tikaStream, contentHandler, metadata, context);
+ assertEquals("Midi should have 2 tracks", "2", metadata.get("tracks"));
+ }
+
+}