You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/24 16:14:22 UTC

svn commit: r1342279 - in /incubator/stanbol/trunk: enhancer/bundlelist/src/main/bundles/ enhancer/engines/tika/ enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/ enhancer/engines/tika/src/test/java/org/apache/stanbol/enhanc...

Author: rwesten
Date: Thu May 24 14:14:21 2012
New Revision: 1342279

URL: http://svn.apache.org/viewvc?rev=1342279&view=rev
Log:
STANBOL-627: Updates the Tika verson used by Stanbol to 1.1

Added:
    incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a   (with props)
Modified:
    incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml
    incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
    incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
    incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
    incubator/stanbol/trunk/parent/pom.xml

Modified: incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml (original)
+++ incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml Thu May 24 14:14:21 2012
@@ -29,14 +29,14 @@
     <bundle> <!-- Apache Tika core (required by the LangId and TikaEngine) -->
         <groupId>org.apache.tika</groupId>
         <artifactId>tika-core</artifactId>
-        <version>1.0</version>
+        <version>1.1</version>
     </bundle>
   </startLevel>
   <startLevel level="17">
     <bundle> <!-- Apache Tika bundle (required by the TikaEngine) -->
         <groupId>org.apache.tika</groupId>
         <artifactId>tika-bundle</artifactId>
-        <version>1.0</version>
+        <version>1.1</version>
     </bundle>
   </startLevel>
   <!-- Stanbol Enhancer infrastructure and required libraries -->

Modified: incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/pom.xml?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/pom.xml Thu May 24 14:14:21 2012
@@ -115,7 +115,6 @@
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-parsers</artifactId>
     </dependency>
-
     <dependency>
       <groupId>org.apache.felix</groupId>
       <artifactId>org.apache.felix.scr.annotations</artifactId>

Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java Thu May 24 14:14:21 2012
@@ -37,6 +37,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.commons.io.IOUtils;
@@ -171,7 +172,8 @@ public class TikaEngine 
         }
         ParseContext context = new ParseContext();
         context.set(Parser.class,parser);
-        if(parser.getSupportedTypes(context).contains(plainMediaType)) {
+        Set<MediaType> supproted = parser.getSupportedTypes(context);
+        if(supproted.contains(plainMediaType)) {
             final InputStream in;
             if(mtas.in == null){
                 in = ci.getStream();
@@ -238,7 +240,7 @@ public class TikaEngine 
             //add the extracted metadata
             if(log.isDebugEnabled()){
                 for(String name : metadata.names()){
-                    log.debug("{}: {}",name,Arrays.toString(metadata.getValues(name)));
+                    log.info("{}: {}",name,Arrays.toString(metadata.getValues(name)));
                 }
             }
             ci.getLock().writeLock().lock();

Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java Thu May 24 14:14:21 2012
@@ -1,5 +1,5 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
+ * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
@@ -363,6 +363,46 @@ public class TikaEngineTest {
         verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
         verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "MP3");
     }
+    /**
+     * Tests mappings for the Mp4 metadata extraction capabilities added to
+     * Tika 1.1 (STANBOL-627)
+     * @throws EngineException
+     * @throws IOException
+     * @throws ParseException
+     */
+    //@Test deactivated because of TIKA-852
+    public void testMp4() throws EngineException, IOException, ParseException {
+        log.info(">>> testMp4 <<<");
+        ContentItem ci = createContentItem("testMP4.m4a", "audio/mp4");
+        assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+        engine.computeEnhancements(ci);
+        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, 
+            singleton("text/plain"));
+        assertNotNull(contentPart);
+        Blob plainTextBlob = contentPart.getValue();
+        assertNotNull(plainTextBlob);
+        assertContentRegexp(plainTextBlob, 
+            "Test Title",
+            "Test Artist",
+            "Test Album");
+        //validate XHTML results
+        contentPart = ContentItemHelper.getBlob(ci, 
+            singleton("application/xhtml+xml"));
+        assertNotNull(contentPart);
+        Blob xhtmlBlob = contentPart.getValue();
+        assertNotNull(xhtmlBlob);
+        //Test AudioTrack metadata
+        NonLiteral audioTrack = verifyNonLiteral(ci, new UriRef(NamespaceEnum.media+"hasTrack"));
+        //types
+        verifyValues(ci, audioTrack, RDF.type, 
+            new UriRef(NamespaceEnum.media+"MediaFragment"),
+            new UriRef(NamespaceEnum.media+"Track"),
+            new UriRef(NamespaceEnum.media+"AudioTrack"));
+        //properties
+        verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasFormat"), XSD.string, "Stereo");
+        verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
+        verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "M4A");
+    }
     @Test
     public void testGEOMetadata() throws EngineException, IOException, ParseException{
         log.info(">>> testGEOMetadata <<<");

Added: incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a?rev=1342279&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: incubator/stanbol/trunk/parent/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/parent/pom.xml?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/parent/pom.xml (original)
+++ incubator/stanbol/trunk/parent/pom.xml Thu May 24 14:14:21 2012
@@ -1118,14 +1118,13 @@
     <dependency>
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-core</artifactId>
-      <version>1.0</version>
+      <version>1.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-parsers</artifactId>
-      <version>1.0</version>
-    </dependency>
-    
+      <version>1.1</version>
+    </dependency>    
     <!-- Aperture -->
     <dependency>
       <groupId>org.semanticdesktop.aperture</groupId>