You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/21 12:57:40 UTC
svn commit: r1375468 - in
/incubator/stanbol/branches/dbpedia-spotlight-engines:
bundlelist/src/main/bundles/ engines/dbpedia-spotlight-annotate/
engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate...
Author: rwesten
Date: Tue Aug 21 10:57:39 2012
New Revision: 1375468
URL: http://svn.apache.org/viewvc?rev=1375468&view=rev
Log:
STANBOL-706: updated dependencies from 0.9,0 to 0.10.0 (as in trunk); Corrected some metadata in the POM files; Several improvements to the Spot engine - support for offline mode, error handling, avoiding in-memory copies of reuqest/response data where possible, added support for selection context, added unit test validating the generated Enhancements, added support for ENHANCE_ASYNC, ...
Added:
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java
- copied, changed from r1375110, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
Removed:
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
Modified:
incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml Tue Aug 21 10:57:39 2012
@@ -197,6 +197,14 @@
<artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
<version>0.10.0-incubating-SNAPSHOT</version>
</bundle>
+
+ <bundle> <!-- DBpedia Spotlight Engines (STANBOL-706) -->
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ </bundle>
+
+
</startLevel>
<!-- Default Configuration for the Stanbol Enhancer -->
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,23 +17,29 @@
<parent>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
<groupId>org.apache.stanbol</groupId>
- <version>0.9.0-incubating</version>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.annotate</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
- <description>just tests the Stanbol Engine Import
- </description>
+ <description></description>
- <inceptionYear>2010</inceptionYear>
+ <inceptionYear>2012</inceptionYear>
- <!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-annotate
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-annotate
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm>
<build>
<plugins>
@@ -76,6 +82,7 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
</dependency>
<dependency>
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -67,7 +67,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.osgi.service.cm.ConfigurationException;
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,22 +17,29 @@
<parent>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
<groupId>org.apache.stanbol</groupId>
- <version>0.9.0-incubating</version>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.candidates</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Candidates</name>
<description>an enhancement engine for associating candidate DBpedia URIs to spotted surfaceForms</description>
- <inceptionYear>2010</inceptionYear>
+ <inceptionYear>2012</inceptionYear>
- <!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-candidates
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-candidates
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm>
<build>
<plugins>
@@ -75,6 +82,7 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
</dependency>
<dependency>
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -65,7 +65,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.osgi.service.cm.ConfigurationException;
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,23 +17,30 @@
<parent>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
<groupId>org.apache.stanbol</groupId>
- <version>0.9.0-incubating</version>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Disambiguate</name>
<description>just tests the Stanbol Engine Import
</description>
- <inceptionYear>2010</inceptionYear>
+ <inceptionYear>2012</inceptionYear>
- <!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-disambiguate
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-disambiguate
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm>
<build>
<plugins>
@@ -76,6 +83,7 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
</dependency>
<dependency>
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -66,7 +66,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,22 +17,29 @@
<parent>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
<groupId>org.apache.stanbol</groupId>
- <version>0.9.0-incubating</version>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Spot</name>
<description>an enhancement engine for spotting</description>
- <inceptionYear>2010</inceptionYear>
+ <inceptionYear>2012</inceptionYear>
- <!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-spot
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-spot
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm>
<build>
<plugins>
@@ -72,15 +79,22 @@
</properties>
<dependencies>
+ <dependency><!-- to ensure deactivation in offline mode -->
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+ <version>0.9.0-incubating</version>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
</dependency>
- <dependency>
+<!-- <dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- </dependency>
+ </dependency> -->
<dependency>
<groupId>org.apache.felix</groupId>
@@ -99,6 +113,19 @@
<artifactId>slf4j-api</artifactId>
</dependency>
+ <!-- test -->
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -16,30 +16,32 @@
*/
package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.XMLParser.getElementsByTagName;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.XMLParser.loadXMLFromInputStream;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
-import java.io.BufferedReader;
+import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
+import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -48,26 +50,28 @@ import org.apache.clerezza.rdf.core.Lang
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
@@ -75,6 +79,7 @@ import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
/**
* {@link DBPSpotlightSpotEnhancementEngine} provides functionality to enhance
@@ -84,11 +89,21 @@ import org.w3c.dom.NodeList;
*/
@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
@Service
-@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot") })
+@Properties(value = {
+ @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot")
+})
public class DBPSpotlightSpotEnhancementEngine extends
AbstractEnhancementEngine<IOException, RuntimeException> implements
EnhancementEngine, ServiceProperties {
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+
+ /**
+ * Ensures this engine is deactivated in {@link OfflineMode}
+ */
+ @Reference
+ OnlineMode onlineMode;
+
/**
* a configurable value of the text segment length to check
*/
@@ -100,7 +115,7 @@ public class DBPSpotlightSpotEnhancement
/**
* The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+ * <code>{@link ServiceProperties#ORDERING_CONTENT_EXTRACTION} - 29</code>
*/
public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 29;
@@ -128,11 +143,21 @@ public class DBPSpotlightSpotEnhancement
.getLogger(DBPSpotlightSpotEnhancementEngine.class);
/** holds the url of the Spotlight REST endpoint */
- private String spotlightUrl;
+ private URL spotlightUrl;
/** holds the chosen of spotter to be used */
private String spotlightSpotter;
/**
+ * Default constructor used by OSGI
+ */
+ public DBPSpotlightSpotEnhancementEngine(){}
+
+ protected DBPSpotlightSpotEnhancementEngine(URL spotlightUrl, String spotlightSpotter){
+ this.spotlightUrl = spotlightUrl;
+ this.spotlightSpotter = spotlightSpotter;
+ }
+
+ /**
* Initialize all parameters from the configuration panel, or with their
* default values
*
@@ -146,10 +171,24 @@ public class DBPSpotlightSpotEnhancement
super.activate(ce);
Dictionary<String, Object> properties = ce.getProperties();
- spotlightUrl = properties.get(SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/spot"
- : (String) properties.get(SL_URL_KEY);
- spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
- : (String) properties.get(SL_SPOTTER);
+ Object value = properties.get(SL_URL_KEY);
+ if(value == null || value.toString().isEmpty()){
+ throw new ConfigurationException(SL_URL_KEY, "The URL with the DBpedia "
+ + "Spotlight Spot RESTful Service MUST NOT be NULL nor empty!");
+ } else {
+ String url = (String) properties.get(SL_URL_KEY);
+ try {
+ this.spotlightUrl = new URL(url);
+ } catch (MalformedURLException e) {
+ throw new ConfigurationException(SL_URL_KEY, "The parsed URL for the "
+ + "DBpedia Spotlight Spot RESTful Service is illegal formatted!",
+ e);
+ }
+ }
+ //also set the spotter to null if an empty string is parsed
+ value = properties.get(SL_SPOTTER);
+ spotlightSpotter = value != null && !value.toString().isEmpty() ?
+ value.toString() : null;
}
/**
@@ -160,18 +199,22 @@ public class DBPSpotlightSpotEnhancement
*/
public int canEnhance(ContentItem ci) throws EngineException {
if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
- String language = getMetadataLanguage(ci.getMetadata(), null);
- if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
- log.info(
- "DBpedia Spotlight can not process ContentItem {} because "
- + "language {} is not supported (supported: {})",
- new Object[] { ci.getUri(), language,
- SUPPORTED_LANGUAGES });
+ String language = EnhancementEngineHelper.getLanguage(ci);
+ if (!SUPPORTED_LANGUAGES.contains(language)) {
+ log.info("DBpedia Spotlight can not process ContentItem {} "
+ + "because language {} is not supported (supported: {})",
+ new Object[] { ci.getUri(), language, SUPPORTED_LANGUAGES });
return CANNOT_ENHANCE;
}
- return ENHANCE_SYNCHRONOUS;
+ //rwesten: ASYNC support is highly recommended for engines that
+ // do call remote services
+ return ENHANCE_ASYNC;
+ } else {
+ log.info("DBpedia Spotlight can not process ContentItem {} "
+ + "because it does not have 'plain/text' content",
+ ci.getUri());
+ return CANNOT_ENHANCE;
}
- return CANNOT_ENHANCE;
}
/**
@@ -182,17 +225,28 @@ public class DBPSpotlightSpotEnhancement
* the {@link ContentItem}
*/
public void computeEnhancements(ContentItem ci) throws EngineException {
+ Language language;
+ String lang = EnhancementEngineHelper.getLanguage(ci);
+ if(!SUPPORTED_LANGUAGES.contains(lang)){
+ throw new IllegalStateException("Langage '"+lang
+ + "' as annotated for ContentItem "
+ + ci.getUri() + " is not supported by this Engine: "
+ + "This is also checked in the canEnhance method! -> This "
+ + "indicated an Bug in the implementation of the "
+ + "EnhancementJobManager!");
+ } else {
+ language = lang == null || lang.isEmpty() ? null : new Language(lang);
+ }
Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
SUPPORTED_MIMTYPES);
if (contentPart == null) {
- throw new IllegalStateException(
- "No ContentPart with Mimetype '"
- + TEXT_PLAIN_MIMETYPE
- + "' found for ContentItem "
- + ci.getUri()
- + ": This is also checked in the canEnhance method! -> This "
- + "indicated an Bug in the implementation of the "
- + "EnhancementJobManager!");
+ throw new IllegalStateException("No ContentPart with Mimetype '"
+ + TEXT_PLAIN_MIMETYPE
+ + "' found for ContentItem "
+ + ci.getUri()
+ + ": This is also checked in the canEnhance method! -> This "
+ + "indicated an Bug in the implementation of the "
+ + "EnhancementJobManager!");
}
String text = "";
try {
@@ -201,13 +255,13 @@ public class DBPSpotlightSpotEnhancement
throw new InvalidContentException(this, ci, e);
}
- Collection<SurfaceForm> dbpslGraph = doPostRequest(text);
+ Collection<SurfaceForm> dbpslGraph = doPostRequest(text,ci.getUri());
if (dbpslGraph != null) {
// Acquire a write lock on the ContentItem when adding the
// enhancements
ci.getLock().writeLock().lock();
try {
- createEnhancements(dbpslGraph, ci);
+ createEnhancements(dbpslGraph, ci,text,language);
if (log.isDebugEnabled()) {
Serializer serializer = Serializer.getInstance();
ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
@@ -235,28 +289,18 @@ public class DBPSpotlightSpotEnhancement
* @param ci
* the content item
*/
- public void createEnhancements(Collection<SurfaceForm> occs,
- ContentItem ci) {
+ protected void createEnhancements(Collection<SurfaceForm> occs,
+ ContentItem ci, String content, Language lang) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
- final Language language; // used for plain literals representing parts
- // fo the content
- String langString = getMetadataLanguage(ci.getMetadata(), null);
-
- if (langString != null && !langString.isEmpty()) {
- language = new Language(langString);
- } else {
- language = null;
- }
HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, UriRef>();
+ MGraph model = ci.getMetadata();
for (SurfaceForm occ : occs) {
UriRef textAnnotation = EnhancementEngineHelper
.createTextEnhancement(ci, this);
- MGraph model = ci.getMetadata();
-
model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
- new PlainLiteralImpl(occ.name, language)));
+ new PlainLiteralImpl(occ.name, lang)));
model.add(new TripleImpl(textAnnotation, ENHANCER_START,
literalFactory.createTypedLiteral(occ.offset)));
model.add(new TripleImpl(textAnnotation, ENHANCER_END,
@@ -264,10 +308,10 @@ public class DBPSpotlightSpotEnhancement
+ occ.name.length())));
model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef(
occ.type)));
- // TODO ################## model.add(new TripleImpl(textAnnotation,
- // ENHANCER_SELECTION_CONTEXT, new
- // PlainLiteralImpl(occ.context,language)));
-
+ model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT,
+ new PlainLiteralImpl(
+ getSelectionContext(content, occ.name, occ.offset),
+ lang)));
if (entityAnnotationMap.containsKey(occ.name)) {
model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
DC_RELATION, textAnnotation));
@@ -282,32 +326,21 @@ public class DBPSpotlightSpotEnhancement
*
* @param text
* a <code>String</code> with the text to be analyzed
+ * @param contentItemUri
+ * the URI of the ContentItem (only used for logging)
* @return a <code>String</code> with the server response
* @throws EngineException
* if the request cannot be sent
*/
- public Collection<SurfaceForm> doPostRequest(String text)
+ protected Collection<SurfaceForm> doPostRequest(String text,UriRef contentItemUri)
throws EngineException {
- StringBuilder data = new StringBuilder();
- try {
- if (spotlightSpotter != null && !spotlightSpotter.isEmpty())
- data.append(URLEncoder.encode("spotter", "UTF-8") + "="
- + URLEncoder.encode(spotlightSpotter, "UTF-8") + "&");
- data.append(URLEncoder.encode("text", "UTF-8") + "="
- + URLEncoder.encode(text, "UTF-8"));
- } catch (UnsupportedEncodingException e) {
- throw new EngineException(
- "Data for the httprequest could not be converted. Error: "
- + e.getMessage());
- }
-
+ //rwesten: reimplemented this so that the request
+ // is directly written to the request instead
+ // of storing the data in an in-memory StringBuilder
HttpURLConnection connection = null;
- StringBuffer response = new StringBuffer();
-
+ BufferedWriter wr = null;
try {
- // Create connection
- URL url = new URL(spotlightUrl);
- connection = (HttpURLConnection) url.openConnection();
+ connection = (HttpURLConnection) spotlightUrl.openConnection();
connection.setRequestMethod("POST");
connection.setRequestProperty("Content-Type",
"application/x-www-form-urlencoded");
@@ -318,51 +351,63 @@ public class DBPSpotlightSpotEnhancement
connection.setDoOutput(true);
// Send request
- DataOutputStream wr = new DataOutputStream(
- connection.getOutputStream());
- wr.writeBytes(data.toString());
- wr.flush();
- wr.close();
-
- // Get Response
- InputStream is = connection.getInputStream();
- BufferedReader rd = new BufferedReader(new InputStreamReader(is));
- String line;
- while ((line = rd.readLine()) != null) {
- response.append(line);
- response.append('\r');
+ wr = new BufferedWriter(new OutputStreamWriter(
+ connection.getOutputStream(),UTF8));
+ } catch (IOException e) {
+ IOUtils.closeQuietly(wr);
+ throw new EngineException("Unable to open connection to "+
+ spotlightUrl,e);
+ }
+ try {
+ if (spotlightSpotter != null && !spotlightSpotter.isEmpty()) {
+ wr.write("spotter=");
+ wr.write(URLEncoder.encode(spotlightSpotter, UTF8.name()));
+ wr.write('&');
}
- rd.close();
-
- } catch (Exception e) {
-
- log.error("[request] Request could not be made. Error: "
- + e.getMessage());
- e.printStackTrace();
- return null;
-
+ wr.write("text=");
+ //now append the URL encoded text
+ //TODO: This will load the URLEncoded variant in-memory.
+ // One could avoid that by encoding the data in smaller
+ // pieces, but using URLEncoding for big data is anyway
+ // very inefficient. So instead of fixing this issue here
+ // DBpedia Spotlight should support "multipart/from-data"
+ // instead.
+ // As soon as this is supported this should be re-implemented
+ // to support streaming.
+ wr.write(URLEncoder.encode(text, UTF8.name()));
+ } catch (UnsupportedEncodingException e) {
+ throw new IllegalStateException(
+ "The platform does not support encoding " + UTF8.name(),e);
+ } catch (IOException e) {
+ throw new EngineException("Unable to write 'plain/text' content "
+ + "for ContentItem "+contentItemUri+" to "
+ + spotlightUrl,e);
} finally {
-
- if (connection != null) {
- connection.disconnect();
- }
+ IOUtils.closeQuietly(wr);
}
-
- XMLParser xmlParser = new XMLParser();
+ // rwesten: reimplemented this to read the XML
+ // Document directly form the response
+ InputStream is = null;
+ Document xmlDoc;
try {
- Document xmlDoc = xmlParser.loadXMLFromString(response.toString());
- NodeList nlist = xmlParser.getElementsByTagName(xmlDoc,
- "surfaceForm");
- Collection<SurfaceForm> annos = this.getAnnotations(nlist);
-
- return annos;
- } catch (Exception e) {
- log.error("[response] Response XML could not be parsed. Error: "
- + e.getMessage());
- throw new EngineException(
- "Response XML could not be parsed. Error: "
- + e.getMessage());
+ // Get Response
+ is = connection.getInputStream();
+ xmlDoc = loadXMLFromInputStream(is);
+ } catch (IOException e) {
+ throw new EngineException("Unable to spot Entities with"
+ + "Dbpedia Spotlight Spot RESTful Serice running at "
+ + spotlightUrl,e);
+ } catch(SAXException e) {
+ throw new EngineException("Unable to parse Response from "
+ + "Dbpedia Spotlight Spot RESTful Serice running at "
+ + spotlightUrl,e);
+ } finally {
+ IOUtils.closeQuietly(is);
}
+ //rwesten: commented the disconnect to allow keep-alive
+ //connection.disconnect();
+ NodeList nlist = getElementsByTagName(xmlDoc,"surfaceForm");
+ return getAnnotations(nlist);
}
/**
@@ -396,15 +441,15 @@ public class DBPSpotlightSpotEnhancement
return Collections.unmodifiableMap(Collections.singletonMap(
ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
}
-
- public String getMetadataLanguage(MGraph model, NonLiteral subj) {
- Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource langNode = it.next().getObject();
- return getLexicalForm(langNode);
- }
- return null;
- }
+// rwesten: Use the Utility provided by the EnhancementEngineHelper instead
+// public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+// Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+// if (it.hasNext()) {
+// Resource langNode = it.next().getObject();
+// return getLexicalForm(langNode);
+// }
+// return null;
+// }
public String getLexicalForm(Resource res) {
if (res == null) {
@@ -415,15 +460,38 @@ public class DBPSpotlightSpotEnhancement
return res.toString();
}
}
-
- /**
- * This method is used by the test class to set the endpoint url
- *
- * @param url
- * String the url of the Spotlight endpoint
- */
- public void setEndpointUrl(String url) {
- spotlightUrl = url;
- }
-
+
+ private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
+ /**
+ * Extracts the selection context based on the content, selection and
+ * the start char offset of the selection
+ * @param content the content
+ * @param selection the selected text
+ * @param selectionStartPos the start char position of the selection
+ * @return the context
+ */
+ public static String getSelectionContext(String content, String selection,int selectionStartPos){
+ //extract the selection context
+ int beginPos;
+ if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
+ beginPos = 0;
+ } else {
+ int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+ beginPos = content.indexOf(' ',start);
+ if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
+ beginPos = start; //begin within a word
+ }
+ }
+ int endPos;
+ if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
+ endPos = content.length();
+ } else {
+ int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+ endPos = content.lastIndexOf(' ', start);
+ if(endPos <= selectionStartPos+selection.length()){
+ endPos = start; //end within a word;
+ }
+ }
+ return content.substring(beginPos, endPos);
+ }
}
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java Tue Aug 21 10:57:39 2012
@@ -34,15 +34,19 @@ import org.xml.sax.SAXException;
*
* @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
*/
+public final class XMLParser {
-public class XMLParser {
-
- public NodeList getElementsByTagName(Document doc, String tagName) {
+ /**
+ * Do not create instances of Utility Classes
+ */
+ private XMLParser(){};
+
+ public static NodeList getElementsByTagName(Document doc, String tagName) {
return doc.getElementsByTagName(tagName);
}
- public Document loadXMLFromString(String xml) throws SAXException,
+ public static Document loadXMLFromString(String xml) throws SAXException,
IOException {
Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
xml.getBytes()));
@@ -51,7 +55,7 @@ public class XMLParser {
return doc;
}
- public Document loadXMLFromInputStream(InputStream is) throws SAXException,
+ public static Document loadXMLFromInputStream(InputStream is) throws SAXException,
IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
@@ -67,7 +71,7 @@ public class XMLParser {
return doc;
}
- public Document loadXMLFromFile(String filePath)
+ public static Document loadXMLFromFile(String filePath)
throws ParserConfigurationException, SAXException, IOException {
File fXmlFile = new File(filePath);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java (from r1375110, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java&r1=1375110&r2=1375468&rev=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java Tue Aug 21 10:57:39 2012
@@ -14,13 +14,38 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.stanbol.enhancer.engines.dbpspotlight.spot.core;
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
-import java.util.Collection;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine.SL_URL_KEY;
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.ENHANCE_ASYNC;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import java.net.URL;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine;
import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.SurfaceForm;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -42,30 +67,58 @@ public class DBPSpotlightSpotEnhancement
private static final Logger LOG = LoggerFactory
.getLogger(DBPSpotlightSpotEnhancementTest.class);
private static String SPL_URL = System
- .getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/spot"
- : (String) System
- .getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY);
+ .getProperty(SL_URL_KEY) == null ?
+ "http://spotlight.dbpedia.org/rest/spot" :
+ (String) System.getProperty(SL_URL_KEY);
private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
private static DBPSpotlightSpotEnhancementEngine dbpslight;
+ private static ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+
+ private static ContentItem ci;
+ private static Entry<UriRef, Blob> textContentPart;
+
@BeforeClass
- public static void oneTimeSetup() throws ConfigurationException {
- dbpslight = new DBPSpotlightSpotEnhancementEngine();
- dbpslight.setEndpointUrl(SPL_URL);
+ public static void oneTimeSetup() throws Exception {
+ //create the contentItem for testing
+ ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
+ assertNotNull(ci);
+ textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
+ assertNotNull(textContentPart);
+ //add the language of the text
+ ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE,
+ new PlainLiteralImpl("en")));
+ assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
+ //and the enhancement engine instance
+ dbpslight = new DBPSpotlightSpotEnhancementEngine(new URL(SPL_URL),null);
}
@Test
- public void testEntityExtraction() {
+ public void testEntityExtraction() throws Exception {
Collection<SurfaceForm> entities;
- try {
- entities = dbpslight.doPostRequest(TEST_TEXT);
- LOG.info("Found entities: {}", entities.size());
- LOG.debug("Entities:\n{}", entities);
- Assert.assertFalse("No entities were found!", entities.isEmpty());
- } catch (EngineException e) {
- Assert.assertFalse("An EngineException occurred! The message was: "
- + e.getMessage(), true);
- }
+ entities = dbpslight.doPostRequest(TEST_TEXT,ci.getUri());
+ LOG.info("Found entities: {}", entities.size());
+ LOG.debug("Entities:\n{}", entities);
+ assertFalse("No entities were found!", entities.isEmpty());
}
+ @Test
+ public void testCanEnhance() throws EngineException {
+ assertEquals(ENHANCE_ASYNC, dbpslight.canEnhance(ci));
+ }
+
+ /**
+ * Validates the Enhancements created by this engine
+ * @throws EngineException
+ */
+ @Test
+ public void testEnhancement() throws EngineException {
+ dbpslight.computeEnhancements(ci);
+ HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+ expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
+ expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
+ dbpslight.getClass().getName()));
+ EnhancementStructureHelper.validateAllTextAnnotations(
+ ci.getMetadata(), TEST_TEXT, expectedValues);
+ }
}