You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/21 12:57:40 UTC

svn commit: r1375468 - in /incubator/stanbol/branches/dbpedia-spotlight-engines: bundlelist/src/main/bundles/ engines/dbpedia-spotlight-annotate/ engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate...

Author: rwesten
Date: Tue Aug 21 10:57:39 2012
New Revision: 1375468

URL: http://svn.apache.org/viewvc?rev=1375468&view=rev
Log:
STANBOL-706: updated dependencies from 0.9,0 to 0.10.0 (as in trunk); Corrected some metadata in the POM files; Several improvements to the Spot engine - support for offline mode, error handling, avoiding in-memory copies of reuqest/response data where possible, added support for selection context, added unit test validating the generated Enhancements, added support for ENHANCE_ASYNC, ...

Added:
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java
      - copied, changed from r1375110, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
Removed:
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
Modified:
    incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml Tue Aug 21 10:57:39 2012
@@ -197,6 +197,14 @@
 	  <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
       <version>0.10.0-incubating-SNAPSHOT</version>
     </bundle>
+    
+    <bundle> <!-- DBpedia Spotlight Engines (STANBOL-706) -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+    </bundle>
+    
+    
   </startLevel>
   
   <!-- Default Configuration for the Stanbol Enhancer -->

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,23 +17,29 @@
 	<parent>
 		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
 		<groupId>org.apache.stanbol</groupId>
-		<version>0.9.0-incubating</version>
+        <version>0.10.0-incubating-SNAPSHOT</version>
 		<relativePath>../../parent</relativePath>
 	</parent>
 
 	<groupId>org.apache.stanbol</groupId>
 	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.annotate</artifactId>
+    <version>0.10.0-incubating-SNAPSHOT</version>
 	<packaging>bundle</packaging>
 
 	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
-	<description>just tests the Stanbol Engine Import
-  </description>
+	<description></description>
 
-	<inceptionYear>2010</inceptionYear>
+	<inceptionYear>2012</inceptionYear>
 
-	<!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+	<scm> 
+	  <connection>
+	    scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-annotate 
+	  </connection>
+	  <developerConnection> 
+	    scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-annotate
+	  </developerConnection>
+	  <url>http://incubator.apache.org/stanbol/</url>
+	</scm>
 
 	<build>
 		<plugins>
@@ -76,6 +82,7 @@
 		<dependency>
 			<groupId>org.apache.stanbol</groupId>
 			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+			<version>0.10.0-incubating-SNAPSHOT</version>
 		</dependency>
 
 		<dependency>

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -67,7 +67,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.osgi.service.cm.ConfigurationException;

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,22 +17,29 @@
 	<parent>
 		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
 		<groupId>org.apache.stanbol</groupId>
-		<version>0.9.0-incubating</version>
+        <version>0.10.0-incubating-SNAPSHOT</version>
 		<relativePath>../../parent</relativePath>
 	</parent>
 
 	<groupId>org.apache.stanbol</groupId>
 	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.candidates</artifactId>
+    <version>0.10.0-incubating-SNAPSHOT</version>
 	<packaging>bundle</packaging>
 
 	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Candidates</name>
 	<description>an enhancement engine for associating candidate DBpedia URIs to spotted surfaceForms</description>
 
-	<inceptionYear>2010</inceptionYear>
+	<inceptionYear>2012</inceptionYear>
 
-	<!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+	<scm> 
+	  <connection>
+	    scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-candidates 
+	  </connection>
+	  <developerConnection> 
+	    scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-candidates 
+	  </developerConnection>
+	  <url>http://incubator.apache.org/stanbol/</url>
+	</scm>
 
 	<build>
 		<plugins>
@@ -75,6 +82,7 @@
 		<dependency>
 			<groupId>org.apache.stanbol</groupId>
 			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+			<version>0.10.0-incubating-SNAPSHOT</version>
 		</dependency>
 
 		<dependency>

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -65,7 +65,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.osgi.service.cm.ConfigurationException;

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,23 +17,30 @@
 	<parent>
 		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
 		<groupId>org.apache.stanbol</groupId>
-		<version>0.9.0-incubating</version>
+        <version>0.10.0-incubating-SNAPSHOT</version>
 		<relativePath>../../parent</relativePath>
 	</parent>
 
 	<groupId>org.apache.stanbol</groupId>
 	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate</artifactId>
+    <version>0.10.0-incubating-SNAPSHOT</version>
 	<packaging>bundle</packaging>
 
 	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Disambiguate</name>
 	<description>just tests the Stanbol Engine Import
   </description>
 
-	<inceptionYear>2010</inceptionYear>
+	<inceptionYear>2012</inceptionYear>
 
-	<!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+	<scm> 
+	  <connection>
+	    scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-disambiguate 
+	  </connection>
+	  <developerConnection> 
+	    scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-disambiguate
+	  </developerConnection>
+	  <url>http://incubator.apache.org/stanbol/</url>
+	</scm>
 
 	<build>
 		<plugins>
@@ -76,6 +83,7 @@
 		<dependency>
 			<groupId>org.apache.stanbol</groupId>
 			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+			<version>0.10.0-incubating-SNAPSHOT</version>
 		</dependency>
 
 		<dependency>

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -66,7 +66,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml Tue Aug 21 10:57:39 2012
@@ -17,22 +17,29 @@
 	<parent>
 		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
 		<groupId>org.apache.stanbol</groupId>
-		<version>0.9.0-incubating</version>
+		<version>0.10.0-incubating-SNAPSHOT</version>
 		<relativePath>../../parent</relativePath>
 	</parent>
 
 	<groupId>org.apache.stanbol</groupId>
 	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
+    <version>0.10.0-incubating-SNAPSHOT</version>
 	<packaging>bundle</packaging>
 
 	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Spot</name>
 	<description>an enhancement engine for spotting</description>
 
-	<inceptionYear>2010</inceptionYear>
+	<inceptionYear>2012</inceptionYear>
 
-	<!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/ 
-		</developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+	<scm> 
+	  <connection>
+	    scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-spot 
+	  </connection>
+	  <developerConnection> 
+	    scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight-spot
+	  </developerConnection>
+	  <url>http://incubator.apache.org/stanbol/</url>
+	</scm>
 
 	<build>
 		<plugins>
@@ -72,15 +79,22 @@
 	</properties>
 
 	<dependencies>
+        <dependency><!-- to ensure deactivation in offline mode -->
+	        <groupId>org.apache.stanbol</groupId>
+	        <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+            <version>0.9.0-incubating</version>
+	        <scope>provided</scope>
+    	</dependency> 
 		<dependency>
 			<groupId>org.apache.stanbol</groupId>
 			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+			<version>0.10.0-incubating-SNAPSHOT</version>
 		</dependency>
 
-		<dependency>
+<!-- 		<dependency>
 			<groupId>org.apache.tika</groupId>
 			<artifactId>tika-core</artifactId>
-		</dependency>
+		</dependency>  -->
 
 		<dependency>
 			<groupId>org.apache.felix</groupId>
@@ -99,6 +113,19 @@
 			<artifactId>slf4j-api</artifactId>
 		</dependency>
 
+		<!-- test -->
+        <dependency>
+            <groupId>org.apache.stanbol</groupId>
+            <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+            <version>0.10.0-incubating-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+		<dependency>
+            <groupId>org.apache.stanbol</groupId>
+            <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+            <version>0.10.0-incubating-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
 		<dependency>
 			<groupId>junit</groupId>
 			<artifactId>junit</artifactId>

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java Tue Aug 21 10:57:39 2012
@@ -16,30 +16,32 @@
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
 
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.XMLParser.getElementsByTagName;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 
-import java.io.BufferedReader;
+import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLEncoder;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
@@ -48,26 +50,28 @@ import org.apache.clerezza.rdf.core.Lang
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Properties;
 import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
@@ -75,6 +79,7 @@ import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
 
 /**
  * {@link DBPSpotlightSpotEnhancementEngine} provides functionality to enhance
@@ -84,11 +89,21 @@ import org.w3c.dom.NodeList;
  */
 @Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
 @Service
-@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot") })
+@Properties(value = { 
+		@Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot") 
+})
 public class DBPSpotlightSpotEnhancementEngine extends
 		AbstractEnhancementEngine<IOException, RuntimeException> implements
 		EnhancementEngine, ServiceProperties {
 
+	private static final Charset UTF8 = Charset.forName("UTF-8");
+	
+	/**
+	 * Ensures this engine is deactivated in {@link OfflineMode}
+	 */
+	@Reference
+	OnlineMode onlineMode;
+	
 	/**
 	 * a configurable value of the text segment length to check
 	 */
@@ -100,7 +115,7 @@ public class DBPSpotlightSpotEnhancement
 
 	/**
 	 * The default value for the Execution of this Engine. Currently set to
-	 * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+	 * <code>{@link ServiceProperties#ORDERING_CONTENT_EXTRACTION} - 29</code>
 	 */
 	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 29;
 
@@ -128,11 +143,21 @@ public class DBPSpotlightSpotEnhancement
 			.getLogger(DBPSpotlightSpotEnhancementEngine.class);
 
 	/** holds the url of the Spotlight REST endpoint */
-	private String spotlightUrl;
+	private URL spotlightUrl;
 	/** holds the chosen of spotter to be used */
 	private String spotlightSpotter;
 
 	/**
+	 * Default constructor used by OSGI
+	 */
+	public DBPSpotlightSpotEnhancementEngine(){}
+	
+	protected DBPSpotlightSpotEnhancementEngine(URL spotlightUrl, String spotlightSpotter){
+		this.spotlightUrl = spotlightUrl;
+		this.spotlightSpotter = spotlightSpotter;
+	}
+	
+	/**
 	 * Initialize all parameters from the configuration panel, or with their
 	 * default values
 	 * 
@@ -146,10 +171,24 @@ public class DBPSpotlightSpotEnhancement
 		super.activate(ce);
 
 		Dictionary<String, Object> properties = ce.getProperties();
-		spotlightUrl = properties.get(SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/spot"
-				: (String) properties.get(SL_URL_KEY);
-		spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
-				: (String) properties.get(SL_SPOTTER);
+		Object value = properties.get(SL_URL_KEY);
+		if(value == null || value.toString().isEmpty()){
+			throw new ConfigurationException(SL_URL_KEY, "The URL with the DBpedia "
+					+ "Spotlight Spot RESTful Service MUST NOT be NULL nor empty!");
+		} else {
+			String url = (String) properties.get(SL_URL_KEY);
+			try {
+				this.spotlightUrl = new URL(url);
+			} catch (MalformedURLException e) {
+				throw new ConfigurationException(SL_URL_KEY, "The parsed URL for the "
+						+ "DBpedia Spotlight Spot RESTful Service is illegal formatted!",
+						e);
+			}
+		}
+		//also set the spotter to null if an empty string is parsed
+		value = properties.get(SL_SPOTTER);
+		spotlightSpotter = value != null && !value.toString().isEmpty() ?
+				value.toString() : null;
 	}
 
 	/**
@@ -160,18 +199,22 @@ public class DBPSpotlightSpotEnhancement
 	 */
 	public int canEnhance(ContentItem ci) throws EngineException {
 		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
-			String language = getMetadataLanguage(ci.getMetadata(), null);
-			if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
-				log.info(
-						"DBpedia Spotlight can not process ContentItem {} because "
-								+ "language {} is not supported (supported: {})",
-						new Object[] { ci.getUri(), language,
-								SUPPORTED_LANGUAGES });
+			String language = EnhancementEngineHelper.getLanguage(ci);
+			if (!SUPPORTED_LANGUAGES.contains(language)) {
+				log.info("DBpedia Spotlight can not process ContentItem {} "
+						+ "because language {} is not supported (supported: {})",
+						new Object[] { ci.getUri(), language, SUPPORTED_LANGUAGES });
 				return CANNOT_ENHANCE;
 			}
-			return ENHANCE_SYNCHRONOUS;
+			//rwesten: ASYNC support is highly recommended for engines that
+			//         do call remote services
+			return ENHANCE_ASYNC;
+		} else {
+			log.info("DBpedia Spotlight can not process ContentItem {} "
+					+ "because it does not have 'plain/text' content",
+					ci.getUri());
+			return CANNOT_ENHANCE;
 		}
-		return CANNOT_ENHANCE;
 	}
 
 	/**
@@ -182,17 +225,28 @@ public class DBPSpotlightSpotEnhancement
 	 *            the {@link ContentItem}
 	 */
 	public void computeEnhancements(ContentItem ci) throws EngineException {
+		Language language;
+		String lang = EnhancementEngineHelper.getLanguage(ci);
+		if(!SUPPORTED_LANGUAGES.contains(lang)){
+			throw new IllegalStateException("Langage '"+lang
+					+ "' as annotated for ContentItem "
+				    + ci.getUri() + " is not supported by this Engine: "
+				    + "This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the "
+					+ "EnhancementJobManager!");
+		} else {
+			language = lang == null || lang.isEmpty() ? null : new Language(lang);
+		}
 		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
 				SUPPORTED_MIMTYPES);
 		if (contentPart == null) {
-			throw new IllegalStateException(
-					"No ContentPart with Mimetype '"
-							+ TEXT_PLAIN_MIMETYPE
-							+ "' found for ContentItem "
-							+ ci.getUri()
-							+ ": This is also checked in the canEnhance method! -> This "
-							+ "indicated an Bug in the implementation of the "
-							+ "EnhancementJobManager!");
+			throw new IllegalStateException("No ContentPart with Mimetype '"
+					+ TEXT_PLAIN_MIMETYPE
+					+ "' found for ContentItem "
+					+ ci.getUri()
+					+ ": This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the "
+					+ "EnhancementJobManager!");
 		}
 		String text = "";
 		try {
@@ -201,13 +255,13 @@ public class DBPSpotlightSpotEnhancement
 			throw new InvalidContentException(this, ci, e);
 		}
 
-		Collection<SurfaceForm> dbpslGraph = doPostRequest(text);
+		Collection<SurfaceForm> dbpslGraph = doPostRequest(text,ci.getUri());
 		if (dbpslGraph != null) {
 			// Acquire a write lock on the ContentItem when adding the
 			// enhancements
 			ci.getLock().writeLock().lock();
 			try {
-				createEnhancements(dbpslGraph, ci);
+				createEnhancements(dbpslGraph, ci,text,language);
 				if (log.isDebugEnabled()) {
 					Serializer serializer = Serializer.getInstance();
 					ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
@@ -235,28 +289,18 @@ public class DBPSpotlightSpotEnhancement
 	 * @param ci
 	 *            the content item
 	 */
-	public void createEnhancements(Collection<SurfaceForm> occs,
-			ContentItem ci) {
+	protected void createEnhancements(Collection<SurfaceForm> occs,
+			ContentItem ci,  String content, Language lang) {
 		LiteralFactory literalFactory = LiteralFactory.getInstance();
-		final Language language; // used for plain literals representing parts
-									// fo the content
-		String langString = getMetadataLanguage(ci.getMetadata(), null);
-
-		if (langString != null && !langString.isEmpty()) {
-			language = new Language(langString);
-		} else {
-			language = null;
-		}
 
 		HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, UriRef>();
 
+		MGraph model = ci.getMetadata();
 		for (SurfaceForm occ : occs) {
 			UriRef textAnnotation = EnhancementEngineHelper
 					.createTextEnhancement(ci, this);
-			MGraph model = ci.getMetadata();
-
 			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
-					new PlainLiteralImpl(occ.name, language)));
+					new PlainLiteralImpl(occ.name, lang)));
 			model.add(new TripleImpl(textAnnotation, ENHANCER_START,
 					literalFactory.createTypedLiteral(occ.offset)));
 			model.add(new TripleImpl(textAnnotation, ENHANCER_END,
@@ -264,10 +308,10 @@ public class DBPSpotlightSpotEnhancement
 							+ occ.name.length())));
 			model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef(
 					occ.type)));
-			// TODO ################## model.add(new TripleImpl(textAnnotation,
-			// ENHANCER_SELECTION_CONTEXT, new
-			// PlainLiteralImpl(occ.context,language)));
-
+			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
+					new PlainLiteralImpl(
+							getSelectionContext(content, occ.name, occ.offset),
+							lang)));
 			if (entityAnnotationMap.containsKey(occ.name)) {
 				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
 						DC_RELATION, textAnnotation));
@@ -282,32 +326,21 @@ public class DBPSpotlightSpotEnhancement
 	 * 
 	 * @param text
 	 *            a <code>String</code> with the text to be analyzed
+	 * @param contentItemUri
+	 *            the URI of the ContentItem (only used for logging)
 	 * @return a <code>String</code> with the server response
 	 * @throws EngineException
 	 *             if the request cannot be sent
 	 */
-	public Collection<SurfaceForm> doPostRequest(String text)
+	protected Collection<SurfaceForm> doPostRequest(String text,UriRef contentItemUri)
 			throws EngineException {
-		StringBuilder data = new StringBuilder();
-		try {
-			if (spotlightSpotter != null && !spotlightSpotter.isEmpty())
-				data.append(URLEncoder.encode("spotter", "UTF-8") + "="
-						+ URLEncoder.encode(spotlightSpotter, "UTF-8") + "&");
-			data.append(URLEncoder.encode("text", "UTF-8") + "="
-					+ URLEncoder.encode(text, "UTF-8"));
-		} catch (UnsupportedEncodingException e) {
-			throw new EngineException(
-					"Data for the httprequest could not be converted. Error: "
-							+ e.getMessage());
-		}
-
+		//rwesten: reimplemented this so that the request
+		//         is directly written to the request instead
+		//         of storing the data in an in-memory StringBuilder
 		HttpURLConnection connection = null;
-		StringBuffer response = new StringBuffer();
-
+		BufferedWriter wr = null;
 		try {
-			// Create connection
-			URL url = new URL(spotlightUrl);
-			connection = (HttpURLConnection) url.openConnection();
+			connection = (HttpURLConnection) spotlightUrl.openConnection();
 			connection.setRequestMethod("POST");
 			connection.setRequestProperty("Content-Type",
 					"application/x-www-form-urlencoded");
@@ -318,51 +351,63 @@ public class DBPSpotlightSpotEnhancement
 			connection.setDoOutput(true);
 
 			// Send request
-			DataOutputStream wr = new DataOutputStream(
-					connection.getOutputStream());
-			wr.writeBytes(data.toString());
-			wr.flush();
-			wr.close();
-
-			// Get Response
-			InputStream is = connection.getInputStream();
-			BufferedReader rd = new BufferedReader(new InputStreamReader(is));
-			String line;
-			while ((line = rd.readLine()) != null) {
-				response.append(line);
-				response.append('\r');
+			wr = new BufferedWriter(new OutputStreamWriter(
+					connection.getOutputStream(),UTF8));
+		} catch (IOException e) {
+			IOUtils.closeQuietly(wr);
+			throw new EngineException("Unable to open connection to "+
+					spotlightUrl,e);
+		}
+		try {
+			if (spotlightSpotter != null && !spotlightSpotter.isEmpty()) {
+				wr.write("spotter=");
+				wr.write(URLEncoder.encode(spotlightSpotter, UTF8.name()));
+				wr.write('&');
 			}
-			rd.close();
-
-		} catch (Exception e) {
-
-			log.error("[request] Request could not be made. Error: "
-					+ e.getMessage());
-			e.printStackTrace();
-			return null;
-
+			wr.write("text=");
+			//now append the URL encoded text
+			//TODO: This will load the URLEncoded variant in-memory.
+			//      One could avoid that by encoding the data in smaller
+			//      pieces, but using URLEncoding for big data is anyway
+			//      very inefficient. So instead of fixing this issue here
+			//      DBpedia Spotlight should support "multipart/from-data"
+			//      instead.
+			//      As soon as this is supported this should be re-implemented
+			//      to support streaming.
+			wr.write(URLEncoder.encode(text, UTF8.name()));
+		} catch (UnsupportedEncodingException e) {
+			throw new IllegalStateException(
+					"The platform does not support encoding " + UTF8.name(),e);
+		} catch (IOException e) {
+			throw new EngineException("Unable to write 'plain/text' content "
+					+ "for ContentItem "+contentItemUri+" to "
+					+ spotlightUrl,e);
 		} finally {
-
-			if (connection != null) {
-				connection.disconnect();
-			}
+			IOUtils.closeQuietly(wr);
 		}
-
-		XMLParser xmlParser = new XMLParser();
+		// rwesten: reimplemented this to read the XML
+		// Document directly form the response
+		InputStream is = null;
+		Document xmlDoc;
 		try {
-			Document xmlDoc = xmlParser.loadXMLFromString(response.toString());
-			NodeList nlist = xmlParser.getElementsByTagName(xmlDoc,
-					"surfaceForm");
-			Collection<SurfaceForm> annos = this.getAnnotations(nlist);
-
-			return annos;
-		} catch (Exception e) {
-			log.error("[response] Response XML could not be parsed. Error: "
-					+ e.getMessage());
-			throw new EngineException(
-					"Response XML could not be parsed. Error: "
-							+ e.getMessage());
+			// Get Response
+			 is = connection.getInputStream();
+			xmlDoc = loadXMLFromInputStream(is);
+		} catch (IOException e) {
+			throw new EngineException("Unable to spot Entities with"
+					+ "Dbpedia Spotlight Spot RESTful Serice running at "
+					+ spotlightUrl,e);
+		} catch(SAXException e) {
+			throw new EngineException("Unable to parse Response from "
+					+ "Dbpedia Spotlight Spot RESTful Serice running at "
+					+ spotlightUrl,e);
+		} finally {
+			IOUtils.closeQuietly(is);
 		}
+		//rwesten: commented the disconnect to allow keep-alive
+		//connection.disconnect();
+		NodeList nlist = getElementsByTagName(xmlDoc,"surfaceForm");
+	    return getAnnotations(nlist);
 	}
 
 	/**
@@ -396,15 +441,15 @@ public class DBPSpotlightSpotEnhancement
 		return Collections.unmodifiableMap(Collections.singletonMap(
 				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
 	}
-
-	public String getMetadataLanguage(MGraph model, NonLiteral subj) {
-		Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
-		if (it.hasNext()) {
-			Resource langNode = it.next().getObject();
-			return getLexicalForm(langNode);
-		}
-		return null;
-	}
+// rwesten: Use the Utility provided by the EnhancementEngineHelper instead
+//	public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+//		Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+//		if (it.hasNext()) {
+//			Resource langNode = it.next().getObject();
+//			return getLexicalForm(langNode);
+//		}
+//		return null;
+//	}
 
 	public String getLexicalForm(Resource res) {
 		if (res == null) {
@@ -415,15 +460,38 @@ public class DBPSpotlightSpotEnhancement
 			return res.toString();
 		}
 	}
-
-	/**
-	 * This method is used by the test class to set the endpoint url
-	 * 
-	 * @param url
-	 *            String the url of the Spotlight endpoint
-	 */
-	public void setEndpointUrl(String url) {
-		spotlightUrl = url;
-	}
-
+	
+    private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
+    /**
+     * Extracts the selection context based on the content, selection and
+     * the start char offset of the selection
+     * @param content the content
+     * @param selection the selected text
+     * @param selectionStartPos the start char position of the selection
+     * @return the context
+     */
+    public static String getSelectionContext(String content, String selection,int selectionStartPos){
+        //extract the selection context
+        int beginPos;
+        if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
+            beginPos = 0;
+        } else {
+            int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            beginPos = content.indexOf(' ',start);
+            if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
+                beginPos = start; //begin within a word
+            }
+        }
+        int endPos;
+        if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
+            endPos = content.length();
+        } else {
+            int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            endPos = content.lastIndexOf(' ', start);
+            if(endPos <= selectionStartPos+selection.length()){
+                endPos = start; //end within a word;
+            }
+        }
+        return content.substring(beginPos, endPos);
+    }
 }

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java?rev=1375468&r1=1375467&r2=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java Tue Aug 21 10:57:39 2012
@@ -34,15 +34,19 @@ import org.xml.sax.SAXException;
  * 
  * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
  */
+public final class XMLParser {
 
-public class XMLParser {
-
-	public NodeList getElementsByTagName(Document doc, String tagName) {
+	/**
+	 * Do not create instances of Utility Classes
+	 */
+	private XMLParser(){};
+	
+	public static NodeList getElementsByTagName(Document doc, String tagName) {
 
 		return doc.getElementsByTagName(tagName);
 	}
 
-	public Document loadXMLFromString(String xml) throws SAXException,
+	public static Document loadXMLFromString(String xml) throws SAXException,
 			IOException {
 		Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
 				xml.getBytes()));
@@ -51,7 +55,7 @@ public class XMLParser {
 		return doc;
 	}
 
-	public Document loadXMLFromInputStream(InputStream is) throws SAXException,
+	public static Document loadXMLFromInputStream(InputStream is) throws SAXException,
 			IOException {
 		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
 		factory.setNamespaceAware(true);
@@ -67,7 +71,7 @@ public class XMLParser {
 		return doc;
 	}
 
-	public Document loadXMLFromFile(String filePath)
+	public static Document loadXMLFromFile(String filePath)
 			throws ParserConfigurationException, SAXException, IOException {
 		File fXmlFile = new File(filePath);
 		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java (from r1375110, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java&r1=1375110&r2=1375468&rev=1375468&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java Tue Aug 21 10:57:39 2012
@@ -14,13 +14,38 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.stanbol.enhancer.engines.dbpspotlight.spot.core;
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
 
-import java.util.Collection;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine.SL_URL_KEY;
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.ENHANCE_ASYNC;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 
+import java.net.URL;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine;
 import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.SurfaceForm;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -42,30 +67,58 @@ public class DBPSpotlightSpotEnhancement
 	private static final Logger LOG = LoggerFactory
 			.getLogger(DBPSpotlightSpotEnhancementTest.class);
 	private static String SPL_URL = System
-			.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/spot"
-			: (String) System
-					.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY);
+			.getProperty(SL_URL_KEY) == null ? 
+					"http://spotlight.dbpedia.org/rest/spot" : 
+						(String) System.getProperty(SL_URL_KEY);
 	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
 	private static DBPSpotlightSpotEnhancementEngine dbpslight;
 
+	private static ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+	
+	private static ContentItem ci;
+	private static Entry<UriRef, Blob> textContentPart;
+	
 	@BeforeClass
-	public static void oneTimeSetup() throws ConfigurationException {
-		dbpslight = new DBPSpotlightSpotEnhancementEngine();
-		dbpslight.setEndpointUrl(SPL_URL);
+	public static void oneTimeSetup() throws Exception {
+		//create the contentItem for testing
+		ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
+		assertNotNull(ci);
+		textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
+		assertNotNull(textContentPart);
+		//add the language of the text
+		ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, 
+				new PlainLiteralImpl("en")));
+		assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
+		//and the enhancement engine instance
+		dbpslight = new DBPSpotlightSpotEnhancementEngine(new URL(SPL_URL),null);
 	}
 
 	@Test
-	public void testEntityExtraction() {
+	public void testEntityExtraction() throws Exception {
 		Collection<SurfaceForm> entities;
-		try {
-			entities = dbpslight.doPostRequest(TEST_TEXT);
-			LOG.info("Found entities: {}", entities.size());
-			LOG.debug("Entities:\n{}", entities);
-			Assert.assertFalse("No entities were found!", entities.isEmpty());
-		} catch (EngineException e) {
-			Assert.assertFalse("An EngineException occurred! The message was: "
-					+ e.getMessage(), true);
-		}
+		entities = dbpslight.doPostRequest(TEST_TEXT,ci.getUri());
+		LOG.info("Found entities: {}", entities.size());
+		LOG.debug("Entities:\n{}", entities);
+		assertFalse("No entities were found!", entities.isEmpty());
 	}
 
+	@Test
+	public void testCanEnhance() throws EngineException {
+		assertEquals(ENHANCE_ASYNC, dbpslight.canEnhance(ci));
+	}
+	
+	/**
+	 * Validates the Enhancements created by this engine
+	 * @throws EngineException
+	 */
+	@Test
+	public void testEnhancement() throws EngineException {
+		dbpslight.computeEnhancements(ci);
+        HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
+        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
+        		dbpslight.getClass().getName()));
+		EnhancementStructureHelper.validateAllTextAnnotations(
+				ci.getMetadata(), TEST_TEXT, expectedValues);
+	}
 }