You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/11/26 11:57:33 UTC
svn commit: r1413551 -
/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
Author: rwesten
Date: Mon Nov 26 10:57:32 2012
New Revision: 1413551
URL: http://svn.apache.org/viewvc?rev=1413551&view=rev
Log:
implementation for STANBOL-809 merged from the Stanbol NLP branch over to the trunk.
Modified:
stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
Modified: stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1413551&r1=1413550&r2=1413551&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java (original)
+++ stanbol/trunk/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java Mon Nov 26 10:57:32 2012
@@ -139,6 +139,7 @@ public class TikaEngine
private ContentItemFactory ciFactory;
private static class MediaTypeAndStream {
+ String uri;
MediaType mediaType;
InputStream in;
}
@@ -265,10 +266,13 @@ public class TikaEngine
private MediaTypeAndStream extractMediaType(ContentItem ci) {
MediaTypeAndStream mtas = new MediaTypeAndStream();
mtas.mediaType = getMediaType(ci.getBlob());
+ mtas.uri = ci.getUri().getUnicodeString();
if(mtas.mediaType == null || mtas.mediaType.equals(MediaType.OCTET_STREAM)){
mtas.in = new BufferedInputStream(ci.getStream());
+ Metadata m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, mtas.uri);
try {
- mtas.mediaType = detector.detect(mtas.in, new Metadata());
+ mtas.mediaType = detector.detect(mtas.in, m);
} catch (IOException e) {
log.warn("Exception while detection the MediaType of the" +
"parsed ContentItem "+ci.getUri(),e);