You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/24 16:14:22 UTC
svn commit: r1342279 - in /incubator/stanbol/trunk:
enhancer/bundlelist/src/main/bundles/ enhancer/engines/tika/
enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/
enhancer/engines/tika/src/test/java/org/apache/stanbol/enhanc...
Author: rwesten
Date: Thu May 24 14:14:21 2012
New Revision: 1342279
URL: http://svn.apache.org/viewvc?rev=1342279&view=rev
Log:
STANBOL-627: Updates the Tika verson used by Stanbol to 1.1
Added:
incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a (with props)
Modified:
incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml
incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
incubator/stanbol/trunk/parent/pom.xml
Modified: incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml (original)
+++ incubator/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml Thu May 24 14:14:21 2012
@@ -29,14 +29,14 @@
<bundle> <!-- Apache Tika core (required by the LangId and TikaEngine) -->
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- <version>1.0</version>
+ <version>1.1</version>
</bundle>
</startLevel>
<startLevel level="17">
<bundle> <!-- Apache Tika bundle (required by the TikaEngine) -->
<groupId>org.apache.tika</groupId>
<artifactId>tika-bundle</artifactId>
- <version>1.0</version>
+ <version>1.1</version>
</bundle>
</startLevel>
<!-- Stanbol Enhancer infrastructure and required libraries -->
Modified: incubator/stanbol/trunk/enhancer/engines/tika/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/pom.xml?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/pom.xml Thu May 24 14:14:21 2012
@@ -115,7 +115,6 @@
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
</dependency>
-
<dependency>
<groupId>org.apache.felix</groupId>
<artifactId>org.apache.felix.scr.annotations</artifactId>
Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java Thu May 24 14:14:21 2012
@@ -37,6 +37,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.Dictionary;
import java.util.Map;
+import java.util.Set;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.io.IOUtils;
@@ -171,7 +172,8 @@ public class TikaEngine
}
ParseContext context = new ParseContext();
context.set(Parser.class,parser);
- if(parser.getSupportedTypes(context).contains(plainMediaType)) {
+ Set<MediaType> supproted = parser.getSupportedTypes(context);
+ if(supproted.contains(plainMediaType)) {
final InputStream in;
if(mtas.in == null){
in = ci.getStream();
@@ -238,7 +240,7 @@ public class TikaEngine
//add the extracted metadata
if(log.isDebugEnabled()){
for(String name : metadata.names()){
- log.debug("{}: {}",name,Arrays.toString(metadata.getValues(name)));
+ log.info("{}: {}",name,Arrays.toString(metadata.getValues(name)));
}
}
ci.getLock().writeLock().lock();
Modified: incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java Thu May 24 14:14:21 2012
@@ -1,5 +1,5 @@
/*
-* Licensed to the Apache Software Foundation (ASF) under one or more
+ * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
@@ -363,6 +363,46 @@ public class TikaEngineTest {
verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "MP3");
}
+ /**
+ * Tests mappings for the Mp4 metadata extraction capabilities added to
+ * Tika 1.1 (STANBOL-627)
+ * @throws EngineException
+ * @throws IOException
+ * @throws ParseException
+ */
+ //@Test deactivated because of TIKA-852
+ public void testMp4() throws EngineException, IOException, ParseException {
+ log.info(">>> testMp4 <<<");
+ ContentItem ci = createContentItem("testMP4.m4a", "audio/mp4");
+ assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci,
+ singleton("text/plain"));
+ assertNotNull(contentPart);
+ Blob plainTextBlob = contentPart.getValue();
+ assertNotNull(plainTextBlob);
+ assertContentRegexp(plainTextBlob,
+ "Test Title",
+ "Test Artist",
+ "Test Album");
+ //validate XHTML results
+ contentPart = ContentItemHelper.getBlob(ci,
+ singleton("application/xhtml+xml"));
+ assertNotNull(contentPart);
+ Blob xhtmlBlob = contentPart.getValue();
+ assertNotNull(xhtmlBlob);
+ //Test AudioTrack metadata
+ NonLiteral audioTrack = verifyNonLiteral(ci, new UriRef(NamespaceEnum.media+"hasTrack"));
+ //types
+ verifyValues(ci, audioTrack, RDF.type,
+ new UriRef(NamespaceEnum.media+"MediaFragment"),
+ new UriRef(NamespaceEnum.media+"Track"),
+ new UriRef(NamespaceEnum.media+"AudioTrack"));
+ //properties
+ verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasFormat"), XSD.string, "Stereo");
+ verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
+ verifyValue(ci, audioTrack, new UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "M4A");
+ }
@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException{
log.info(">>> testGEOMetadata <<<");
Added: incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a?rev=1342279&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/enhancer/engines/tika/src/test/resources/testMP4.m4a
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: incubator/stanbol/trunk/parent/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/parent/pom.xml?rev=1342279&r1=1342278&r2=1342279&view=diff
==============================================================================
--- incubator/stanbol/trunk/parent/pom.xml (original)
+++ incubator/stanbol/trunk/parent/pom.xml Thu May 24 14:14:21 2012
@@ -1118,14 +1118,13 @@
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- <version>1.0</version>
+ <version>1.1</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
- <version>1.0</version>
- </dependency>
-
+ <version>1.1</version>
+ </dependency>
<!-- Aperture -->
<dependency>
<groupId>org.semanticdesktop.aperture</groupId>