You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by wk...@apache.org on 2011/06/08 09:34:43 UTC
svn commit: r1133268 -
/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
Author: wkasper
Date: Wed Jun 8 07:34:43 2011
New Revision: 1133268
URL: http://svn.apache.org/viewvc?rev=1133268&view=rev
Log:
Stanbol-213: Enable use of text extracts contained in metadata
Modified:
incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
Modified: incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java?rev=1133268&r1=1133267&r2=1133268&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/autotagging/src/main/java/org/apache/stanbol/enhancer/engines/autotagging/impl/RelatedTopicEnhancementEngine.java Wed Jun 8 07:34:43 2011
@@ -16,14 +16,18 @@
*/
package org.apache.stanbol.enhancer.engines.autotagging.impl;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.NIE_PLAINTEXTCONTENT;
+
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Component;
@@ -74,11 +78,19 @@ public class RelatedTopicEnhancementEngi
+ ci.getId());
return;
}
- String text;
- try {
- text = IOUtils.toString(ci.getStream(),"UTF-8");
- } catch (IOException e) {
- throw new InvalidContentException(this, ci, e);
+ String mimeType = ci.getMimeType().split(";", 2)[0];
+ String text = "";
+ if (TEXT_PLAIN_MIMETYPE.equals(mimeType)) {
+ try {
+ text = IOUtils.toString(ci.getStream(),"UTF-8");
+ } catch (IOException e) {
+ throw new InvalidContentException(this, ci, e);
+ }
+ } else {
+ Iterator<Triple> it = ci.getMetadata().filter(new UriRef(ci.getId()), NIE_PLAINTEXTCONTENT, null);
+ while (it.hasNext()) {
+ text += it.next().getObject();
+ }
}
if (text.trim().length() == 0) {
// TODO: make the length of the data a field of the ContentItem
@@ -109,6 +121,12 @@ public class RelatedTopicEnhancementEngi
if (TEXT_PLAIN_MIMETYPE.equalsIgnoreCase(mimeType)) {
return ENHANCE_SYNCHRONOUS;
}
+ // check for existence of textual content in metadata
+ UriRef subj = new UriRef(ci.getId());
+ Iterator<Triple> it = ci.getMetadata().filter(subj, NIE_PLAINTEXTCONTENT, null);
+ if (it.hasNext()) {
+ return ENHANCE_SYNCHRONOUS;
+ }
return CANNOT_ENHANCE;
}