You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/11/26 11:57:33 UTC

svn commit: r1413551 - /stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java

Author: rwesten
Date: Mon Nov 26 10:57:32 2012
New Revision: 1413551

URL: http://svn.apache.org/viewvc?rev=1413551&view=rev
Log:
implementation for STANBOL-809 merged from the Stanbol NLP branch over to the trunk.

Modified:
    stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java

Modified: stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1413551&r1=1413550&r2=1413551&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java (original)
+++ stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java Mon Nov 26 10:57:32 2012
@@ -139,6 +139,7 @@ public class TikaEngine 
     private ContentItemFactory ciFactory;
     
     private static class MediaTypeAndStream {
+        String uri;
         MediaType mediaType;
         InputStream in;
     }
@@ -265,10 +266,13 @@ public class TikaEngine 
     private MediaTypeAndStream extractMediaType(ContentItem ci) {
         MediaTypeAndStream mtas = new MediaTypeAndStream();
         mtas.mediaType = getMediaType(ci.getBlob());
+        mtas.uri = ci.getUri().getUnicodeString();
         if(mtas.mediaType == null || mtas.mediaType.equals(MediaType.OCTET_STREAM)){
             mtas.in = new BufferedInputStream(ci.getStream());
+            Metadata m = new Metadata();
+            m.add(Metadata.RESOURCE_NAME_KEY, mtas.uri);
             try {
-                mtas.mediaType = detector.detect(mtas.in, new Metadata());
+                mtas.mediaType = detector.detect(mtas.in, m);
             } catch (IOException e) {
                 log.warn("Exception while detection the MediaType of the" +
                         "parsed ContentItem "+ci.getUri(),e);