You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/24 15:48:54 UTC

svn commit: r1376912 [1/2] - in /incubator/stanbol/branches/dbpedia-spotlight-engines: ./ bundlelist/src/main/bundles/ engines/ engines/dbpedia-spotlight-annotate/ engines/dbpedia-spotlight-candidates/ engines/dbpedia-spotlight-disambiguate/ engines/db...

Author: rwesten
Date: Fri Aug 24 13:48:52 2012
New Revision: 1376912

URL: http://svn.apache.org/viewvc?rev=1376912&view=rev
Log:
STANBOL-706: Moved all DBpedia Spotlight Engines to a single module. Moved shared functionality to a utility class. Moved shared constants to a common interface; Also applied changes to the Disambiguation engine similar as for the others

Added:
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/
      - copied from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/pom.xml
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/Constants.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/
      - copied from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/
      - copied from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/
      - copied from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/config/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-dbpspotlight.config
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/
      - copied from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/
      - copied from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java
      - copied, changed from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/resources/README
      - copied unchanged from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/resources/README
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/resources/spots.xml
      - copied unchanged from r1376397, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/resources/spots.xml
Removed:
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/Annotation.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/CandidateResource.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/SurfaceForm.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/Annotation.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/
Modified:
    incubator/stanbol/branches/dbpedia-spotlight-engines/   (props changed)
    incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java

Propchange: incubator/stanbol/branches/dbpedia-spotlight-engines/
------------------------------------------------------------------------------
    svn:mergeinfo = /incubator/stanbol/trunk/enhancer:1376046,1376385

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml?rev=1376912&r1=1376911&r2=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/bundlelist/src/main/bundles/list.xml Fri Aug 24 13:48:52 2012
@@ -201,20 +201,9 @@
      <!-- DBpedia Spotlight Engines (STANBOL-706) -->
     <bundle>
       <groupId>org.apache.stanbol</groupId>
-      <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
+      <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight</artifactId>
       <version>0.10.0-incubating-SNAPSHOT</version>
     </bundle>
-    <bundle>
-      <groupId>org.apache.stanbol</groupId>
-      <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.candidates</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
-    </bundle>
-    <bundle>
-      <groupId>org.apache.stanbol</groupId>
-      <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.annotate</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
-    </bundle>
-    
   </startLevel>
   
   <!-- Default Configuration for the Stanbol Enhancer -->

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/pom.xml (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/pom.xml?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/pom.xml&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/pom.xml Fri Aug 24 13:48:52 2012
@@ -22,12 +22,22 @@
 	</parent>
 
 	<groupId>org.apache.stanbol</groupId>
-	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.annotate</artifactId>
+	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight</artifactId>
     <version>0.10.0-incubating-SNAPSHOT</version>
 	<packaging>bundle</packaging>
 
-	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
-	<description></description>
+	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight</name>
+	<description>
+        This module provides four Enhancement Engines for the
+        DBpedia Spotlight RESTful services. This includes the Annotate Engine
+        - supporting the whole processing workflow as well as a Spotting,
+        Candidate and Disambiguation Engine that can be used by Users that whant
+        only to use part of DBpedia Spotlights functionalities within their
+        own Enhancement Engines.
+        Users that do not want to send their Content to the public Spotlight
+        server can also install a local Spotlight server and change the
+        Configuration of the Engines accordingly.
+    </description>
 
 	<inceptionYear>2012</inceptionYear>
 
@@ -54,6 +64,8 @@
 						</Export-Package>
 						<Embed-Dependency>
 						</Embed-Dependency>
+						<!-- configure a dbpedia chain  -->
+				        <Install-Path>config</Install-Path>
 					</instructions>
 				</configuration>
 			</plugin>

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/Constants.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/Constants.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/Constants.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/Constants.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,73 @@
+package org.apache.stanbol.enhancer.engines.dbpspotlight;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.felix.scr.annotations.Property;
+
+/**
+ * Defines Properties used for the configuration of the different Engines
+ */
+public interface Constants {
+	
+	String PARAM_URL_KEY = "dbpedia.spotlight.url";
+
+	String PARAM_SPOTTER = "dbpedia.spotlight.spotter";
+
+	String PARAM_DISAMBIGUATOR = "dbpedia.spotlight.disambiguator";
+
+	String PARAM_RESTRICTION = "dbpedia.spotlight.types";
+
+	String PARAM_SPARQL = "dbpedia.spotlight.sparql";
+
+	String PARAM_SUPPORT = "dbpedia.spotlight.support";
+
+	String PARAM_CONFIDENCE = "dbpedia.spotlight.confidence";
+
+	
+	/**
+	 * The namespace used by DBpedia Spotlight specific properties 
+	 */
+	String SPOTLIGHT_NAME_SPACE = "http://spotlight.dbpedia.org/ns/";
+	
+	/*
+	 * Definition of some Spotlight specific properties added to
+	 * fise:EntityAnnotations created by this Engine
+	 */
+	UriRef PROPERTY_CONTEXTUAL_SCORE = new UriRef(
+			SPOTLIGHT_NAME_SPACE + "contextualScore");
+	UriRef PROPERTY_PERCENTAGE_OF_SECOND_RANK = new UriRef(
+			SPOTLIGHT_NAME_SPACE + "percentageOfSecondRank");
+	UriRef PROPERTY_SUPPORT = new UriRef(
+			SPOTLIGHT_NAME_SPACE + "support");
+	UriRef PROPERTY_PRIOR_SCORE = new UriRef(
+			SPOTLIGHT_NAME_SPACE + "priorScore");
+	UriRef PROPERTY_FINAL_SCORE = new UriRef(
+			SPOTLIGHT_NAME_SPACE + "finalScore");
+	UriRef PROPERTY_SIMILARITY_SCORE = new UriRef(
+			SPOTLIGHT_NAME_SPACE + "similarityScore");
+	
+	Charset UTF8 = Charset.forName("UTF-8");
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	String TEXT_PLAIN_MIMETYPE = "text/plain";
+	/**
+	 * This contains a list of languages supported by DBpedia Spotlight. If the
+	 * metadata doesn't contain a value for the language as the value of the
+	 * {@link Property.DC_LANG property} the content can't be processed.
+	 */
+	Set<String> SUPPORTED_LANGUAGES = Collections
+			.unmodifiableSet(new HashSet<String>(Arrays.asList("en")));
+
+	
+	/** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
+	Set<String> SUPPORTED_MIMTYPES = Collections
+			.singleton(TEXT_PLAIN_MIMETYPE);
+
+}

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementEngine.java Fri Aug 24 13:48:52 2012
@@ -16,18 +16,15 @@
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.annotate;
 
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.annotate.XMLParser.getElementsByTagName;
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.annotate.XMLParser.loadXMLFromInputStream;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_CONFIDENCE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_DISAMBIGUATOR;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_RESTRICTION;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPARQL;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPOTTER;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SUPPORT;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 
 import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
@@ -36,32 +33,15 @@ import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLEncoder;
-import java.nio.charset.Charset;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.clerezza.rdf.core.Language;
-import org.apache.clerezza.rdf.core.Literal;
-import org.apache.clerezza.rdf.core.LiteralFactory;
-import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
-import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.Serializer;
 import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Component;
@@ -71,22 +51,18 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
 import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
-import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
 /**
@@ -95,39 +71,24 @@ import org.xml.sax.SAXException;
  * 
  * @author Iavor Jelev, Babelmonkeys (GzEvD)
  */
-@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.name", description = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.description")
+@Component(metatype = true, immediate = true, 
+    label = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.name", 
+    description = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.description")
 @Service
-@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightannotate") })
+@Properties(value = { 
+		@Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightannotate"),
+		@Property(name = PARAM_URL_KEY, value = "http://spotlight.dbpedia.org/rest/annotate"),
+		@Property(name = PARAM_SPOTTER),
+		@Property(name = PARAM_DISAMBIGUATOR),
+		@Property(name = PARAM_RESTRICTION),
+		@Property(name = PARAM_SPARQL),
+		@Property(name = PARAM_SUPPORT),
+		@Property(name = PARAM_CONFIDENCE)
+})
 public class DBPSpotlightAnnotateEnhancementEngine extends
 		AbstractEnhancementEngine<IOException, RuntimeException> implements
 		EnhancementEngine, ServiceProperties {
 
-	private static final Charset UTF8 = Charset.forName("UTF-8");
-
-	/**
-	 * a configurable value of the text segment length to check
-	 */
-	@Property(value = "http://spotlight.dbpedia.org/rest/annotate")
-	public static final String SL_URL_KEY = "stanbol.DBPSpotlightAnnotateEnhancementEngine.url";
-
-	@Property(value = "NESpotter")
-	public static final String SL_SPOTTER = "stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter";
-
-	@Property(value = "")
-	public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator";
-
-	@Property()
-	public static final String SL_RESTRICTION = "stanbol.DBPSpotlightAnnotateEnhancementEngine.types";
-
-	@Property()
-	public static final String SL_SPARQL = "stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql";
-
-	@Property()
-	public static final String SL_SUPPORT = "stanbol.DBPSpotlightAnnotateEnhancementEngine.support";
-
-	@Property()
-	public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence";
-
 	/**
 	 * Ensures this engine is deactivated in {@link OfflineMode}
 	 */
@@ -140,22 +101,6 @@ public class DBPSpotlightAnnotateEnhance
 	 */
 	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 27;
 
-	/**
-	 * This contains the only MIME type directly supported by this enhancement
-	 * engine.
-	 */
-	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
-	/** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
-	private static final Set<String> SUPPORTED_MIMTYPES = Collections
-			.singleton(TEXT_PLAIN_MIMETYPE);
-	/**
-	 * This contains a list of languages supported by DBpedia Spotlight. If the
-	 * metadata doesn't contain a value for the language as the value of the
-	 * {@link Property.DC_LANG property} the content can't be processed.
-	 */
-	protected static final Set<String> SUPPORTED_LANGUAGES = Collections
-			.unmodifiableSet(new HashSet<String>(Arrays.asList("en")));
-
 	/** holds the logger. */
 	private static final Logger log = LoggerFactory
 			.getLogger(DBPSpotlightAnnotateEnhancementEngine.class);
@@ -203,34 +148,23 @@ public class DBPSpotlightAnnotateEnhance
 		super.activate(ce);
 
 		Dictionary<String, Object> properties = ce.getProperties();
-		Object value = properties.get(SL_URL_KEY);
-		if(value == null || value.toString().isEmpty()){
-			throw new ConfigurationException(SL_URL_KEY, "The URL with the DBpedia "
-					+ "Spotlight Annotate RESTful Service MUST NOT be NULL nor empty!");
-		} else {
-			String url = (String) properties.get(SL_URL_KEY);
-			try {
-				this.spotlightUrl = new URL(url);
-			} catch (MalformedURLException e) {
-				throw new ConfigurationException(SL_URL_KEY, "The parsed URL for the "
-						+ "DBpedia Spotlight Annotate RESTful Service is illegal formatted!",
-						e);
-			}
-		}
-		spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
-				: (String) properties.get(SL_SPOTTER);
-		spotlightDisambiguator = properties.get(SL_DISAMBIGUATOR) == null ? null
-				: (String) properties.get(SL_DISAMBIGUATOR);
-		spotlightTypesRestriction = properties.get(SL_RESTRICTION) == null ? null
-				: (String) properties.get(SL_RESTRICTION);
-		spotlightSparql = properties.get(SL_SPARQL) == null ? null
-				: (String) properties.get(SL_SPARQL);
-		spotlightSupport = properties.get(SL_SUPPORT) == null ? null
-				: (String) properties.get(SL_SUPPORT);
-		spotlightConfidence = properties.get(SL_CONFIDENCE) == null ? null
-				: (String) properties.get(SL_CONFIDENCE);
+		spotlightUrl = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
+		spotlightSpotter = properties.get(PARAM_SPOTTER) == null ? null
+				: (String) properties.get(PARAM_SPOTTER);
+		spotlightDisambiguator = properties.get(PARAM_DISAMBIGUATOR) == null ? null
+				: (String) properties.get(PARAM_DISAMBIGUATOR);
+		spotlightTypesRestriction = properties.get(PARAM_RESTRICTION) == null ? null
+				: (String) properties.get(PARAM_RESTRICTION);
+		spotlightSparql = properties.get(PARAM_SPARQL) == null ? null
+				: (String) properties.get(PARAM_SPARQL);
+		spotlightSupport = properties.get(PARAM_SUPPORT) == null ? null
+				: (String) properties.get(PARAM_SUPPORT);
+		spotlightConfidence = properties.get(PARAM_CONFIDENCE) == null ? null
+				: (String) properties.get(PARAM_CONFIDENCE);
 	}
 
+
+
 	/**
 	 * Check if the content can be enhanced
 	 * 
@@ -238,18 +172,8 @@ public class DBPSpotlightAnnotateEnhance
 	 *            the {@link ContentItem}
 	 */
 	public int canEnhance(ContentItem ci) throws EngineException {
-		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
-			String language = EnhancementEngineHelper.getLanguage(ci);
-			if (!SUPPORTED_LANGUAGES.contains(language)) {
-				log.info("DBpedia Spotlight can not process ContentItem {} "
-						+ "because language {} is not supported (supported: {})",
-						new Object[] { ci.getUri(), language, SUPPORTED_LANGUAGES });
-				return CANNOT_ENHANCE;
-			}
-			return ENHANCE_ASYNC;
-		} else {
-			return CANNOT_ENHANCE;
-		}
+		return SpotlightEngineUtils.canProcess(ci) ?
+			ENHANCE_ASYNC : CANNOT_ENHANCE;
 	}
 
 	/**
@@ -260,36 +184,8 @@ public class DBPSpotlightAnnotateEnhance
 	 *            the {@link ContentItem}
 	 */
 	public void computeEnhancements(ContentItem ci) throws EngineException {
-		Language language;
-		String lang = EnhancementEngineHelper.getLanguage(ci);
-		if(!SUPPORTED_LANGUAGES.contains(lang)){
-			throw new IllegalStateException("Langage '"+lang
-					+ "' as annotated for ContentItem "
-				    + ci.getUri() + " is not supported by this Engine: "
-				    + "This is also checked in the canEnhance method! -> This "
-					+ "indicated an Bug in the implementation of the "
-					+ "EnhancementJobManager!");
-		} else {
-			language = lang == null || lang.isEmpty() ? null : new Language(lang);
-		}
-		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
-				SUPPORTED_MIMTYPES);
-		if (contentPart == null) {
-			throw new IllegalStateException(
-					"No ContentPart with Mimetype '"
-							+ TEXT_PLAIN_MIMETYPE
-							+ "' found for ContentItem "
-							+ ci.getUri()
-							+ ": This is also checked in the canEnhance method! -> This "
-							+ "indicated an Bug in the implementation of the "
-							+ "EnhancementJobManager!");
-		}
-		String text;
-		try {
-			text = ContentItemHelper.getText(contentPart.getValue());
-		} catch (IOException e) {
-			throw new InvalidContentException(this, ci, e);
-		}
+		Language language = SpotlightEngineUtils.getContentLanguage(ci);
+		String text = SpotlightEngineUtils.getPlainContent(ci);
 
 		Collection<Annotation> dbpslGraph = doPostRequest(text,ci.getUri());
 		if (dbpslGraph != null) {
@@ -316,6 +212,8 @@ public class DBPSpotlightAnnotateEnhance
 		}
 	}
 
+
+
 	/**
 	 * This generates enhancement structures for the entities from DBPedia
 	 * Spotlight and adds them to the content item's metadata. For each entity a
@@ -329,62 +227,22 @@ public class DBPSpotlightAnnotateEnhance
 	 */
 	protected void createEnhancements(Collection<Annotation> occs,
 			ContentItem ci, String text, Language language) {
-		LiteralFactory literalFactory = LiteralFactory.getInstance();
-
-		HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
-
+		//we need to create multiple EntityAnnotations even for the same
+		//suggested Entity, as the scores will be different
+		//HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
 		for (Annotation occ : occs) {
-			UriRef textAnnotation = EnhancementEngineHelper
-					.createTextEnhancement(ci, this);
-			MGraph model = ci.getMetadata();
-			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
-					new PlainLiteralImpl(occ.surfaceForm, language)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_START,
-					literalFactory.createTypedLiteral(occ.offset)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_END,
-					literalFactory.createTypedLiteral(occ.offset
-							+ occ.surfaceForm.length())));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
-					new PlainLiteralImpl(
-							getSelectionContext(text, occ.surfaceForm, occ.offset),
-							language)));
-			if (entityAnnotationMap.containsKey(occ.uri)) {
-				model.add(new TripleImpl(entityAnnotationMap.get(occ.uri),
-						DC_RELATION, textAnnotation));
-			} else {
-				UriRef entityAnnotation = EnhancementEngineHelper
-						.createEntityEnhancement(ci, this);
-				log.info(" annotation: {} {}",occ.uri,occ.surfaceForm);
-				entityAnnotationMap.put(occ.uri, entityAnnotation);
-				Literal label = new PlainLiteralImpl(occ.surfaceForm,
-						new Language("en"));
-				model.add(new TripleImpl(entityAnnotation, DC_RELATION,
-						textAnnotation));
-				model.add(new TripleImpl(entityAnnotation,
-						ENHANCER_ENTITY_LABEL, label));
-				model.add(new TripleImpl(entityAnnotation,
-						ENHANCER_ENTITY_REFERENCE, occ.uri));
-				//set the fise:entity-type
-				for(String type : occ.getTypeNames()){
-					UriRef annotationType = new UriRef(type);
-					log.info(" > {}",annotationType);
-					model.add(new TripleImpl(entityAnnotation,
-							ENHANCER_ENTITY_TYPE, annotationType));
-				}
-				//set the dc:type of the fise:TextAnnotation if not yet done
-				List<String> dbpTypes = occ.getDbpediaTypeNames();
-				if(!dbpTypes.isEmpty() && !model.filter(textAnnotation, DC_TYPE, null).hasNext()){
-					//use the last of the dbpedia ontology type as they
-					//are sorted from the most specific to the most
-					//common one - the dc:type should be a common one
-					UriRef dcType = new UriRef(dbpTypes.get(dbpTypes.size()-1));
-					log.info(" dcType={}",dcType);
-					model.add(new TripleImpl(textAnnotation, DC_TYPE,
-							dcType));
-				}
+			UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
+					occ.surfaceForm, this, ci, text, language);
+
+//			if (entityAnnotationMap.containsKey(occ.uri)) {
+//				model.add(new TripleImpl(entityAnnotationMap.get(occ.uri),
+//						DC_RELATION, textAnnotation));
+//			} else {
+			SpotlightEngineUtils.createEntityAnnotation(occ, this, ci, textAnnotation, language);
+//				entityAnnotationMap.put(occ.uri, entityAnnotation);
 			}
 		}
-	}
+
 
 	/**
 	 * Sends a POST request to the DBpediaSpotlight endpoint.
@@ -484,80 +342,13 @@ public class DBPSpotlightAnnotateEnhance
 		} finally {
 			IOUtils.closeQuietly(is);
 		}
-		NodeList nlist = getElementsByTagName(xmlDoc, "Resource");
-		return getAnnotations(nlist);
+		return Annotation.parseAnnotations(xmlDoc);
 	}
 
-	/**
-	 * This method creates the Collection of Annotations, which the method
-	 * <code>createEnhancement</code> adds to the meta data of the content item.
-	 * 
-	 * @param nList
-	 *            NodeList of all Resources contained in the XML response from
-	 *            DBpedia Spotlight
-	 * @return a Collection<DBPSLAnnotation> with all annotations
-	 */
-	private Collection<Annotation> getAnnotations(NodeList nList) {
-		Collection<Annotation> dbpslAnnos = new HashSet<Annotation>();
 
-		for (int temp = 0; temp < nList.getLength(); temp++) {
-			Annotation dbpslann = new Annotation();
-			Element node = (Element) nList.item(temp);
-			dbpslann.uri = new UriRef(node.getAttribute("URI"));
-			dbpslann.support = (new Integer(node.getAttribute("support")))
-					.intValue();
-			dbpslann.types = node.getAttribute("types");
-			dbpslann.surfaceForm = node.getAttribute("surfaceForm");
-			dbpslann.offset = (new Integer(node.getAttribute("offset")))
-					.intValue();
-			dbpslann.similarityScore = (new Double(
-					node.getAttribute("similarityScore"))).doubleValue();
-			dbpslann.percentageOfSecondRank = (new Double(
-					node.getAttribute("percentageOfSecondRank"))).doubleValue();
-
-			dbpslAnnos.add(dbpslann);
-		}
-
-		return dbpslAnnos;
-	}
 
 	public Map<String, Object> getServiceProperties() {
 		return Collections.unmodifiableMap(Collections.singletonMap(
 				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
 	}
-
-    private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
-    /**
-     * Extracts the selection context based on the content, selection and
-     * the start char offset of the selection
-     * @param content the content
-     * @param selection the selected text
-     * @param selectionStartPos the start char position of the selection
-     * @return the context
-     */
-    protected static String getSelectionContext(String content, String selection,int selectionStartPos){
-        //extract the selection context
-        int beginPos;
-        if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
-            beginPos = 0;
-        } else {
-            int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
-            beginPos = content.indexOf(' ',start);
-            if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
-                beginPos = start; //begin within a word
-            }
-        }
-        int endPos;
-        if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
-            endPos = content.length();
-        } else {
-            int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
-            endPos = content.lastIndexOf(' ', start);
-            if(endPos <= selectionStartPos+selection.length()){
-                endPos = start; //end within a word;
-            }
-        }
-        return content.substring(beginPos, endPos);
-    }
-
 }

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementEngine.java Fri Aug 24 13:48:52 2012
@@ -16,51 +16,36 @@
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.candidates;
 
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.candidates.XMLParser.getElementsByTagName;
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.candidates.XMLParser.loadXMLFromInputStream;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_CONFIDENCE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_DISAMBIGUATOR;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_RESTRICTION;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPARQL;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPOTTER;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SUPPORT;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 
-import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLEncoder;
-import java.nio.charset.Charset;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.clerezza.rdf.core.Language;
-import org.apache.clerezza.rdf.core.Literal;
-import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
-import org.apache.clerezza.rdf.core.Resource;
-import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.Serializer;
 import org.apache.commons.io.IOUtils;
@@ -71,23 +56,19 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
 import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
-import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.CandidateResource;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
 /**
@@ -98,33 +79,20 @@ import org.xml.sax.SAXException;
  */
 @Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightCandidatesEnhancementEngine.name", description = "%stanbol.DBPSpotlightCandidatesEnhancementEngine.description")
 @Service
-@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightcandidates") })
+@Properties(value = { 
+		@Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightcandidates"),
+		@Property(name = PARAM_URL_KEY, value = "http://spotlight.dbpedia.org/rest/candidates"),
+		@Property(name = PARAM_SPOTTER),
+		@Property(name = PARAM_DISAMBIGUATOR),
+		@Property(name = PARAM_RESTRICTION),
+		@Property(name = PARAM_SPARQL),
+		@Property(name = PARAM_SUPPORT),
+		@Property(name = PARAM_CONFIDENCE)
+})
 public class DBPSpotlightCandidatesEnhancementEngine extends
 		AbstractEnhancementEngine<IOException, RuntimeException> implements
 		EnhancementEngine, ServiceProperties {
 
-	/** a configurable value of the text segment length to check */
-	@Property(value = "http://spotlight.dbpedia.org/rest/candidates")
-	public static final String SL_URL_KEY = "stanbol.DBPSpotlightCandidatesEnhancementEngine.url";
-
-	@Property(value = "LingPipeSpotter")
-	public static final String SL_SPOTTER = "stanbol.DBPSpotlightCandidatesEnhancementEngine.spotter";
-
-	@Property(value = "")
-	public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightCandidatesEnhancementEngine.disambiguator";
-
-	@Property()
-	public static final String SL_RESTRICTION = "stanbol.DBPSpotlightCandidatesEnhancementEngine.types";
-
-	@Property()
-	public static final String SL_SPARQL = "stanbol.DBPSpotlightCandidatesEnhancementEngine.sparql";
-
-	@Property()
-	public static final String SL_SUPPORT = "stanbol.DBPSpotlightCandidatesEnhancementEngine.support";
-
-	@Property()
-	public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightCandidatesEnhancementEngine.confidence";
-
 	/**
 	 * Ensures this engine is deactivated in {@link OfflineMode}
 	 */
@@ -138,43 +106,6 @@ public class DBPSpotlightCandidatesEnhan
 	 */
 	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 35;
 
-	private static final Charset UTF8 = Charset.forName("UTF-8");
-	/**
-	 * This contains the only MIME type directly supported by this enhancement
-	 * engine.
-	 */
-	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
-	/**
-	 * This contains a list of languages supported by DBpedia Spotlight. If the
-	 * metadata doesn't contain a value for the language as the value of the
-	 * {@link Property.DC_LANG property} the content can't be processed.
-	 */
-	protected static final Set<String> SUPPORTED_LANGUAGES = Collections
-			.unmodifiableSet(new HashSet<String>(Arrays.asList("en")));
-	/**
-	 * This contains the only MIME type directly supported by this enhancement
-	 * engine.
-	 */
-	private static final String SPOTLIGHT_NAME_SPACE = "http://spotlight.dbpedia.org/ns/";
-	
-	/*
-	 * Definition of some Spotlight specific properties added to
-	 * fise:EntityAnnotations created by this Engine
-	 */
-	public static final UriRef SPOTLIGHT_CONTEXTUAL_SCORE = new UriRef(
-			SPOTLIGHT_NAME_SPACE + "contextualScore");
-	public static final UriRef SPOTLIGHT_PERCENTAGE_OF_SECOND_RANK = new UriRef(
-			SPOTLIGHT_NAME_SPACE + "percentageOfSecondRank");
-	public static final UriRef SPOTLIGHT_SUPPORT = new UriRef(
-			SPOTLIGHT_NAME_SPACE + "support");
-	public static final UriRef SPOTLIGHT_PRIOR_SCORE = new UriRef(
-			SPOTLIGHT_NAME_SPACE + "priorScore");
-	public static final UriRef SPOTLIGHT_FINAL_SCORE = new UriRef(
-			SPOTLIGHT_NAME_SPACE + "finalScore");
-	
-	/** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
-	private static final Set<String> SUPPORTED_MIMTYPES = Collections
-			.singleton(TEXT_PLAIN_MIMETYPE);
 
 	/** This contains the logger. */
 	private static final Logger log = LoggerFactory
@@ -224,32 +155,19 @@ public class DBPSpotlightCandidatesEnhan
 		// TODO initialize Extractor
 		Dictionary<String, Object> properties = ce.getProperties();
 		//parse the URL of the RESTful service
-		Object value = properties.get(SL_URL_KEY);
-		if(value == null || value.toString().isEmpty()){
-			throw new ConfigurationException(SL_URL_KEY, "The URL with the DBpedia "
-					+ "Spotlight Spot RESTful Service MUST NOT be NULL nor empty!");
-		} else {
-			String url = (String) properties.get(SL_URL_KEY);
-			try {
-				this.spotlightUrl = new URL(url);
-			} catch (MalformedURLException e) {
-				throw new ConfigurationException(SL_URL_KEY, "The parsed URL for the "
-						+ "DBpedia Spotlight Spot RESTful Service is illegal formatted!",
-						e);
-			}
-		}
-		spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
-				: (String) properties.get(SL_SPOTTER);
-		spotlightDisambiguator = properties.get(SL_DISAMBIGUATOR) == null ? null
-				: (String) properties.get(SL_DISAMBIGUATOR);
-		spotlightTypesRestriction = properties.get(SL_RESTRICTION) == null ? null
-				: (String) properties.get(SL_RESTRICTION);
-		spotlightSparql = properties.get(SL_SPARQL) == null ? null
-				: (String) properties.get(SL_SPARQL);
-		spotlightSupport = properties.get(SL_SUPPORT) == null ? null
-				: (String) properties.get(SL_SUPPORT);
-		spotlightConfidence = properties.get(SL_CONFIDENCE) == null ? null
-				: (String) properties.get(SL_CONFIDENCE);
+		spotlightUrl = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
+		spotlightSpotter = properties.get(PARAM_SPOTTER) == null ? null
+				: (String) properties.get(PARAM_SPOTTER);
+		spotlightDisambiguator = properties.get(PARAM_DISAMBIGUATOR) == null ? null
+				: (String) properties.get(PARAM_DISAMBIGUATOR);
+		spotlightTypesRestriction = properties.get(PARAM_RESTRICTION) == null ? null
+				: (String) properties.get(PARAM_RESTRICTION);
+		spotlightSparql = properties.get(PARAM_SPARQL) == null ? null
+				: (String) properties.get(PARAM_SPARQL);
+		spotlightSupport = properties.get(PARAM_SUPPORT) == null ? null
+				: (String) properties.get(PARAM_SUPPORT);
+		spotlightConfidence = properties.get(PARAM_CONFIDENCE) == null ? null
+				: (String) properties.get(PARAM_CONFIDENCE);
 	}
 
 	/**
@@ -259,18 +177,8 @@ public class DBPSpotlightCandidatesEnhan
 	 *            the {@link ContentItem}
 	 */
 	public int canEnhance(ContentItem ci) throws EngineException {
-		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
-			String language = EnhancementEngineHelper.getLanguage(ci);
-			if (!SUPPORTED_LANGUAGES.contains(language)) {
-				log.info("DBpedia Spotlight can not process ContentItem {} "
-						+ "because language {} is not supported (supported: {})",
-						new Object[] { ci.getUri(), language, SUPPORTED_LANGUAGES });
-				return CANNOT_ENHANCE;
-			}
-			return ENHANCE_ASYNC;
-		} else {
-			return CANNOT_ENHANCE;
-		}
+		return SpotlightEngineUtils.canProcess(ci) ?
+				ENHANCE_ASYNC : CANNOT_ENHANCE;
 	}
 
 	/**
@@ -281,36 +189,8 @@ public class DBPSpotlightCandidatesEnhan
 	 *            the {@link ContentItem}
 	 */
 	public void computeEnhancements(ContentItem ci) throws EngineException {
-		Language language;
-		String lang = EnhancementEngineHelper.getLanguage(ci);
-		if(!SUPPORTED_LANGUAGES.contains(lang)){
-			throw new IllegalStateException("Langage '"+lang
-					+ "' as annotated for ContentItem "
-				    + ci.getUri() + " is not supported by this Engine: "
-				    + "This is also checked in the canEnhance method! -> This "
-					+ "indicated an Bug in the implementation of the "
-					+ "EnhancementJobManager!");
-		} else {
-			language = lang == null || lang.isEmpty() ? null : new Language(lang);
-		}
-		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
-				SUPPORTED_MIMTYPES);
-		if (contentPart == null) {
-			throw new IllegalStateException(
-					"No ContentPart with Mimetype '"
-							+ TEXT_PLAIN_MIMETYPE
-							+ "' found for ContentItem "
-							+ ci.getUri()
-							+ ": This is also checked in the canEnhance method! -> This "
-							+ "indicated an Bug in the implementation of the "
-							+ "EnhancementJobManager!");
-		}
-		String text;
-		try {
-			text = ContentItemHelper.getText(contentPart.getValue());
-		} catch (IOException e) {
-			throw new InvalidContentException(this, ci, e);
-		}
+		Language language = SpotlightEngineUtils.getContentLanguage(ci);
+		String text = SpotlightEngineUtils.getPlainContent(ci);
 
 		Collection<SurfaceForm> dbpslGraph = doPostRequest(text,ci.getUri());
 		if (dbpslGraph != null) {
@@ -349,55 +229,20 @@ public class DBPSpotlightCandidatesEnhan
 	 */
 	protected void createEnhancements(Collection<SurfaceForm> occs,
 			ContentItem ci, String text, Language language) {
-		LiteralFactory literalFactory = LiteralFactory.getInstance();
 
 		// TODO create TextEnhancement (form, start, end, type?)
 		HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, UriRef>();
 
 		MGraph model = ci.getMetadata();
 		for (SurfaceForm occ : occs) {
-			UriRef textAnnotation = EnhancementEngineHelper
-					.createTextEnhancement(ci, this);
-			// model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef(
-			// occ.types )));
-			// for autotagger use the name instead of the matched term (that
-			// might be a pronoun!)
-			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
-					new PlainLiteralImpl(occ.name, language)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_START,
-					literalFactory.createTypedLiteral(occ.offset)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_END,
-					literalFactory.createTypedLiteral(occ.offset
-							+ occ.name.length())));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
-					new PlainLiteralImpl(
-							getSelectionContext(text, occ.name, occ.offset),
-							language)));
-
+			UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
+					occ, this, ci, text, language);
 			Iterator<CandidateResource> resources = occ.resources.iterator();
 			while (resources.hasNext()) {
 				CandidateResource resource = resources.next();
-				UriRef entityAnnotation = EnhancementEngineHelper
-						.createEntityEnhancement(ci, this);
+				UriRef entityAnnotation = SpotlightEngineUtils.createEntityAnnotation(
+						resource, this, ci, textAnnotation);
 				entityAnnotationMap.put(resource.uri, entityAnnotation);
-				Literal label = new PlainLiteralImpl(resource.label,
-						new Language("en"));
-				model.add(new TripleImpl(entityAnnotation, DC_RELATION,
-						textAnnotation));
-				model.add(new TripleImpl(entityAnnotation,
-						ENHANCER_ENTITY_LABEL, label));
-				model.add(new TripleImpl(entityAnnotation,
-						ENHANCER_ENTITY_REFERENCE, new UriRef(resource.uri)));
-				model.add(new TripleImpl(entityAnnotation, SPOTLIGHT_CONTEXTUAL_SCORE,
-						literalFactory.createTypedLiteral(resource.contextualScore)));
-				model.add(new TripleImpl(entityAnnotation,SPOTLIGHT_PERCENTAGE_OF_SECOND_RANK,
-						literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
-				model.add(new TripleImpl(entityAnnotation, SPOTLIGHT_SUPPORT, literalFactory
-						.createTypedLiteral(resource.support)));
-				model.add(new TripleImpl(entityAnnotation, SPOTLIGHT_PRIOR_SCORE, literalFactory
-						.createTypedLiteral(resource.priorScore)));
-				model.add(new TripleImpl(entityAnnotation, SPOTLIGHT_FINAL_SCORE, literalFactory
-						.createTypedLiteral(resource.finalScore)));
 			}
 			if (entityAnnotationMap.containsKey(occ.name)) {
 				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
@@ -505,62 +350,7 @@ public class DBPSpotlightCandidatesEnhan
 		} finally {
 			IOUtils.closeQuietly(is);
 		}
-		NodeList nlist = getElementsByTagName(xmlDoc,"surfaceForm");
-		Collection<SurfaceForm> annos = this.getAnnotations(nlist);
-		return annos;
-	}
-
-	/**
-	 * This method creates the Collection of surface forms, which the method
-	 * <code>createEnhancement</code> adds to the meta data of the content item
-	 * as TextAnnotations.
-	 * 
-	 * @param nList
-	 *            NodeList of all Resources contained in the XML response from
-	 *            DBpedia Spotlight
-	 * @return a Collection<DBPSLSurfaceForm> with all annotations
-	 */
-	private Collection<SurfaceForm> getAnnotations(NodeList nList) {
-		Collection<SurfaceForm> dbpslAnnos = new HashSet<SurfaceForm>();
-
-		for (int temp = 0; temp < nList.getLength(); temp++) {
-			SurfaceForm dbpslann = new SurfaceForm();
-			Element node = (Element) nList.item(temp);
-			dbpslann.name = node.getAttribute("name");
-			dbpslann.offset = (new Integer(node.getAttribute("offset")))
-					.intValue();
-			// dbpslann.type = node.getAttribute( "type" );
-
-			NodeList resources = node.getChildNodes();
-
-			for (int count = 0; count < resources.getLength(); count++) {
-				Node n = resources.item(count);
-				if (n instanceof Element) {
-					Element r = (Element) n;
-					CandidateResource resource = new CandidateResource();
-					resource.label = r.getAttribute("label");
-					resource.uri = r.getAttribute("uri");
-					resource.contextualScore = (new Double(
-							r.getAttribute("contextualScore"))).doubleValue();
-					resource.percentageOfSecondRank = (new Double(
-							r.getAttribute("percentageOfSecondRank")))
-							.doubleValue();
-					resource.support = (new Double(r.getAttribute("support")))
-							.doubleValue();
-					resource.priorScore = (new Double(
-							r.getAttribute("priorScore"))).doubleValue();
-					resource.finalScore = (new Double(
-							r.getAttribute("finalScore"))).doubleValue();
-					dbpslann.resources.add(resource);
-				}
-
-				// Element r = (Element) resources.item(count);
-			}
-
-			dbpslAnnos.add(dbpslann);
-		}
-
-		return dbpslAnnos;
+		return CandidateResource.parseCandidates(xmlDoc);
 	}
 
 	public Map<String, Object> getServiceProperties() {
@@ -568,38 +358,5 @@ public class DBPSpotlightCandidatesEnhan
 				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
 	}
 
-    private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
-    /**
-     * Extracts the selection context based on the content, selection and
-     * the start char offset of the selection
-     * @param content the content
-     * @param selection the selected text
-     * @param selectionStartPos the start char position of the selection
-     * @return the context
-     */
-    protected static String getSelectionContext(String content, String selection,int selectionStartPos){
-        //extract the selection context
-        int beginPos;
-        if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
-            beginPos = 0;
-        } else {
-            int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
-            beginPos = content.indexOf(' ',start);
-            if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
-                beginPos = start; //begin within a word
-            }
-        }
-        int endPos;
-        if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
-            endPos = content.length();
-        } else {
-            int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
-            endPos = content.lastIndexOf(' ', start);
-            if(endPos <= selectionStartPos+selection.length()){
-                endPos = start; //end within a word;
-            }
-        }
-        return content.substring(beginPos, endPos);
-    }
 
 }

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java?rev=1376912&r1=1376397&r2=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java Fri Aug 24 13:48:52 2012
@@ -16,7 +16,14 @@
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
 
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_CONFIDENCE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_DISAMBIGUATOR;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_RESTRICTION;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPARQL;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SUPPORT;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
@@ -25,12 +32,11 @@ import static org.apache.stanbol.enhance
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 
-import java.io.BufferedReader;
+import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
-import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
 import java.net.URL;
@@ -39,44 +45,42 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Properties;
 import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
-import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
 
 /**
  * {@link DBPSpotlightDisambiguateEnhancementEngine} provides functionality to
@@ -84,31 +88,29 @@ import org.w3c.dom.NodeList;
  * 
  * @author Iavor Jelev, Babelmonkeys (GzEvD)
  */
-@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.name", description = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.description")
+@Component(metatype = true, immediate = true, 
+	label = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.name", 
+	description = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.description")
 @Service
-@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightdisambiguate") })
+@Properties(value = { 
+		@Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightdisambiguate"),
+		@Property(name = PARAM_URL_KEY, value = "http://spotlight.dbpedia.org/rest/annotate"),
+		@Property(name = PARAM_DISAMBIGUATOR, value = "Document"),
+		@Property(name = PARAM_RESTRICTION),
+		@Property(name = PARAM_SPARQL),
+		@Property(name = PARAM_SUPPORT),
+		@Property(name = PARAM_CONFIDENCE)
+})
 public class DBPSpotlightDisambiguateEnhancementEngine extends
 		AbstractEnhancementEngine<IOException, RuntimeException> implements
 		EnhancementEngine, ServiceProperties {
 
-	// all parameters which can be used to configure the EnhancementEngine
-	@Property(value = "http://spotlight.dbpedia.org/rest/annotate")
-	public static final String SL_URL_KEY = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.url";
-
-	@Property(value = "Document")
-	public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.disambiguator";
-
-	@Property()
-	public static final String SL_RESTRICTION = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.types";
-
-	@Property()
-	public static final String SL_SPARQL = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.sparql";
-
-	@Property()
-	public static final String SL_SUPPORT = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.support";
-
-	@Property()
-	public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.confidence";
+	/**
+	 * Ensures this engine is deactivated in {@link OfflineMode}
+	 */
+	@SuppressWarnings("unused")
+	@Reference
+	private OnlineMode onlineMode;
 
 	/**
 	 * The default value for the Execution of this Engine. Currently set to
@@ -116,19 +118,11 @@ public class DBPSpotlightDisambiguateEnh
 	 */
 	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 31;
 
-	/**
-	 * This contains the only MIME type directly supported by this enhancement
-	 * engine.
-	 */
-	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
-	/** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
-	private static final Set<String> SUPPORTED_MIMTYPES = Collections
-			.singleton(TEXT_PLAIN_MIMETYPE);
 	/** This contains the logger. */
 	private static final Logger log = LoggerFactory
 			.getLogger(DBPSpotlightDisambiguateEnhancementEngine.class);
 	/** holds the url of the Spotlight REST endpoint */
-	private String spotlightUrl;
+	private URL spotlightUrl;
 	/** holds the chosen of disambiguator to be used */
 	private String spotlightDisambiguator;
 	/** holds the type restriction for the results, if the user wishes one */
@@ -144,7 +138,20 @@ public class DBPSpotlightDisambiguateEnh
 	 * Spotlight, and later for linking of the results
 	 */
 	private Hashtable<String, UriRef> textAnnotationsMap;
-
+	/**
+	 * Default constructor used by OSGI. It is expected that
+	 * {@link #activate(ComponentContext)} is called before
+	 * using the instance.
+	 */
+	public DBPSpotlightDisambiguateEnhancementEngine(){}
+	
+	/**
+	 * Constructor intended to be used for unit tests
+	 * @param serviceURL
+	 */
+	protected DBPSpotlightDisambiguateEnhancementEngine(URL serviceURL){
+		this.spotlightUrl = serviceURL;
+	}
 	/**
 	 * Initialize all parameters from the configuration panel, or with their
 	 * default values
@@ -159,18 +166,17 @@ public class DBPSpotlightDisambiguateEnh
 		super.activate(ce);
 
 		Dictionary<String, Object> properties = ce.getProperties();
-		spotlightUrl = properties.get(SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
-				: (String) properties.get(SL_URL_KEY);
-		spotlightDisambiguator = properties.get(SL_DISAMBIGUATOR) == null ? null
-				: (String) properties.get(SL_DISAMBIGUATOR);
-		spotlightTypesRestriction = properties.get(SL_RESTRICTION) == null ? null
-				: (String) properties.get(SL_RESTRICTION);
-		spotlightSparql = properties.get(SL_SPARQL) == null ? null
-				: (String) properties.get(SL_SPARQL);
-		spotlightSupport = properties.get(SL_SUPPORT) == null ? "-1"
-				: (String) properties.get(SL_SUPPORT);
-		spotlightConfidence = properties.get(SL_CONFIDENCE) == null ? "-1"
-				: (String) properties.get(SL_CONFIDENCE);
+		spotlightUrl = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
+		spotlightDisambiguator = properties.get(PARAM_DISAMBIGUATOR) == null ? null
+				: (String) properties.get(PARAM_DISAMBIGUATOR);
+		spotlightTypesRestriction = properties.get(PARAM_RESTRICTION) == null ? null
+				: (String) properties.get(PARAM_RESTRICTION);
+		spotlightSparql = properties.get(PARAM_SPARQL) == null ? null
+				: (String) properties.get(PARAM_SPARQL);
+		spotlightSupport = properties.get(PARAM_SUPPORT) == null ? "-1"
+				: (String) properties.get(PARAM_SUPPORT);
+		spotlightConfidence = properties.get(PARAM_CONFIDENCE) == null ? "-1"
+				: (String) properties.get(PARAM_CONFIDENCE);
 	}
 
 	/**
@@ -180,11 +186,8 @@ public class DBPSpotlightDisambiguateEnh
 	 *            the {@link ContentItem}
 	 */
 	public int canEnhance(ContentItem ci) throws EngineException {
-		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
-			return ENHANCE_SYNCHRONOUS;
-		} else {
-			return CANNOT_ENHANCE;
-		}
+		return SpotlightEngineUtils.canProcess(ci) ?
+				ENHANCE_ASYNC : CANNOT_ENHANCE;
 	}
 
 	/**
@@ -195,37 +198,21 @@ public class DBPSpotlightDisambiguateEnh
 	 *            the {@link ContentItem}
 	 */
 	public void computeEnhancements(ContentItem ci) throws EngineException {
-		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
-				SUPPORTED_MIMTYPES);
-		if (contentPart == null) {
-			throw new IllegalStateException(
-					"No ContentPart with Mimetype '"
-							+ TEXT_PLAIN_MIMETYPE
-							+ "' found for ContentItem "
-							+ ci.getUri()
-							+ ": This is also checked in the canEnhance method! -> This "
-							+ "indicated an Bug in the implementation of the "
-							+ "EnhancementJobManager!");
-		}
-		String text = "";
-		try {
-			text = ContentItemHelper.getText(contentPart.getValue());
+		Language language = SpotlightEngineUtils.getContentLanguage(ci);
+		String text = SpotlightEngineUtils.getPlainContent(ci);
 
-		} catch (IOException e) {
-			throw new InvalidContentException(this, ci, e);
-		}
 
 		// Retrieve the existing text annotations (requires read lock)
 		MGraph graph = ci.getMetadata();
 		String xmlTextAnnotations = this.getSpottedXml(text, graph);
 		Collection<Annotation> dbpslGraph = doPostRequest(text,
-				xmlTextAnnotations);
+				xmlTextAnnotations, ci.getUri());
 		if (dbpslGraph != null) {
 			// Acquire a write lock on the ContentItem when adding the
 			// enhancements
 			ci.getLock().writeLock().lock();
 			try {
-				createEnhancements(dbpslGraph, ci);
+				createEnhancements(dbpslGraph, ci, language);
 				if (log.isDebugEnabled()) {
 					Serializer serializer = Serializer.getInstance();
 					ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
@@ -255,17 +242,7 @@ public class DBPSpotlightDisambiguateEnh
 	 *            the content item
 	 */
 	public void createEnhancements(Collection<Annotation> occs,
-			ContentItem ci) {
-		final Language language; // used for plain literals representing parts
-									// fo the content
-		String langString = getMetadataLanguage(ci.getMetadata(), null);
-
-		if (langString != null && !langString.isEmpty()) {
-			language = new Language(langString);
-		} else {
-			language = null;
-		}
-
+			ContentItem ci, Language language) {
 		HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
 
 		for (Annotation occ : occs) {
@@ -276,13 +253,13 @@ public class DBPSpotlightDisambiguateEnh
 				UriRef entityAnnotation = EnhancementEngineHelper
 						.createEntityEnhancement(ci, this);
 				entityAnnotationMap.put(occ.uri, entityAnnotation);
-				Literal label = new PlainLiteralImpl(occ.surfaceForm, language);
+				Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
 				model.add(new TripleImpl(entityAnnotation, DC_RELATION,
 						textAnnotation));
 				model.add(new TripleImpl(entityAnnotation,
 						ENHANCER_ENTITY_LABEL, label));
 
-				HashSet<String> t = occ.getTypeNames();
+				Collection<String> t = occ.getTypeNames();
 				if (t != null) {
 					Iterator<String> it = t.iterator();
 					while (it.hasNext())
@@ -302,52 +279,18 @@ public class DBPSpotlightDisambiguateEnh
 	 *            a <code>String</code> with the text to be analyzed
 	 * @param xmlTextAnnotations
 	 * @param textAnnotations
+	 * @param contentItemUri the URI of the {@link ContentItem} (only
+	 * used for logging in case of an error)
 	 * @return a <code>String</code> with the server response
 	 * @throws EngineException
 	 *             if the request cannot be sent
 	 */
-	public Collection<Annotation> doPostRequest(String text,
-			String xmlTextAnnotations) throws EngineException {
-		StringBuilder data = new StringBuilder();
-
-		try {
-			data.append(URLEncoder.encode("spotter=SpotXmlParser", "UTF-8")
-					+ "&");
-			if (spotlightDisambiguator != null
-					&& !spotlightDisambiguator.isEmpty())
-				data.append(URLEncoder.encode("disambiguator", "UTF-8") + "="
-						+ URLEncoder.encode(spotlightDisambiguator, "UTF-8")
-						+ "&");
-			if (spotlightTypesRestriction != null
-					&& !spotlightTypesRestriction.isEmpty())
-				data.append(URLEncoder.encode("types", "UTF-8") + "="
-						+ URLEncoder.encode(spotlightTypesRestriction, "UTF-8")
-						+ "&");
-			if (spotlightSupport != null && !spotlightSupport.isEmpty())
-				data.append(URLEncoder.encode("support", "UTF-8") + "="
-						+ URLEncoder.encode(spotlightSupport, "UTF-8") + "&");
-			if (spotlightConfidence != null && !spotlightConfidence.isEmpty())
-				data.append(URLEncoder.encode("confidence", "UTF-8") + "="
-						+ URLEncoder.encode(spotlightConfidence, "UTF-8") + "&");
-			if (spotlightSparql != null && !spotlightSparql.isEmpty()
-					&& spotlightTypesRestriction == null)
-				data.append(URLEncoder.encode("sparql", "UTF-8") + "="
-						+ URLEncoder.encode(spotlightSparql, "UTF-8") + "&");
-			data.append(URLEncoder.encode("text", "UTF-8") + "="
-					+ URLEncoder.encode(xmlTextAnnotations, "UTF-8"));
-		} catch (UnsupportedEncodingException e) {
-			throw new EngineException(
-					"Data for the httprequest could not be converted. Error: "
-							+ e.getMessage());
-		}
-
+	protected Collection<Annotation> doPostRequest(String text,
+			String xmlTextAnnotations, UriRef contentItemUri) throws EngineException {
 		HttpURLConnection connection = null;
-		StringBuffer response = new StringBuffer();
-
+		BufferedWriter wr = null;
 		try {
-			// Create connection
-			URL url = new URL(spotlightUrl);
-			connection = (HttpURLConnection) url.openConnection();
+			connection = (HttpURLConnection) spotlightUrl.openConnection();
 			connection.setRequestMethod("POST");
 			connection.setRequestProperty("Content-Type",
 					"application/x-www-form-urlencoded");
@@ -358,45 +301,74 @@ public class DBPSpotlightDisambiguateEnh
 			connection.setDoOutput(true);
 
 			// Send request
-			DataOutputStream wr = new DataOutputStream(
-					connection.getOutputStream());
-			wr.writeBytes(data.toString());
-			wr.flush();
-			wr.close();
+			wr = new BufferedWriter(new OutputStreamWriter(
+					connection.getOutputStream(),UTF8));
+		} catch (IOException e) {
+			IOUtils.closeQuietly(wr);
+			throw new EngineException("Unable to open connection to "+
+					spotlightUrl,e);
+		}
+		try {
 
-			// Get Response
-			InputStream is = connection.getInputStream();
-			BufferedReader rd = new BufferedReader(new InputStreamReader(is));
-			String line;
-			while ((line = rd.readLine()) != null) {
-				response.append(line);
-				response.append('\r');
+			wr.write("spotter=SpotXmlParser&");
+			if (spotlightDisambiguator != null
+					&& !spotlightDisambiguator.isEmpty()){
+				wr.write("disambiguator=");
+				wr.write(URLEncoder.encode(spotlightDisambiguator, "UTF-8"));
+				wr.write('&');
 			}
-			rd.close();
-
-		} catch (Exception e) {
-			log.error("[request - error] The following error occurred: "
-					+ e.getMessage());
-
-		} finally {
-
-			if (connection != null) {
-				connection.disconnect();
+			if (spotlightTypesRestriction != null
+					&& !spotlightTypesRestriction.isEmpty()){
+				wr.write("types=");
+				wr.write(URLEncoder.encode(spotlightTypesRestriction, "UTF-8"));
+				wr.write('&');
+			}
+			if (spotlightSupport != null && !spotlightSupport.isEmpty()) {
+				wr.write("support=");
+				wr.write(URLEncoder.encode(spotlightSupport, "UTF-8"));
+				wr.write('&');
+			}
+			if (spotlightConfidence != null && !spotlightConfidence.isEmpty()){
+				wr.write("confidence=");
+				wr.write(URLEncoder.encode(spotlightConfidence, "UTF-8"));
+				wr.write('&');
+			}
+			if (spotlightSparql != null && !spotlightSparql.isEmpty()
+					&& spotlightTypesRestriction == null) {
+				wr.write("sparql=");
+				wr.write(URLEncoder.encode(spotlightSparql, "UTF-8"));
+				wr.write('&');
 			}
+			wr.write("text=");
+			wr.write(URLEncoder.encode(xmlTextAnnotations, "UTF-8"));
+		} catch (UnsupportedEncodingException e) {
+			throw new IllegalStateException(
+					"The platform does not support encoding " + UTF8.name(),e);
+		} catch (IOException e) {
+			throw new EngineException("Unable to write 'plain/text' content "
+					+ "for ContentItem "+contentItemUri+" to "
+					+ spotlightUrl,e);
+		} finally {
+			IOUtils.closeQuietly(wr);
 		}
-
-		XMLParser xmlParser = new XMLParser();
+		InputStream is = null;
+		Document xmlDoc;
 		try {
-			Document xmlDoc = xmlParser.loadXMLFromString(response.toString());
-			NodeList nlist = xmlParser.getElementsByTagName(xmlDoc, "Resource");
-			Collection<Annotation> annos = this.getAnnotations(nlist);
-
-			return annos;
-		} catch (Exception e) {
-			throw new EngineException(
-					"Response XML could not be parsed. Error: "
-							+ e.getMessage());
+			// Get Response
+			 is = connection.getInputStream();
+			xmlDoc = loadXMLFromInputStream(is);
+		} catch (IOException e) {
+			throw new EngineException("Unable to spot Entities with"
+					+ "Dbpedia Spotlight Annotate RESTful Serice running at "
+					+ spotlightUrl,e);
+		} catch(SAXException e) {
+			throw new EngineException("Unable to parse Response from "
+					+ "Dbpedia Spotlight Annotate RESTful Serice running at "
+					+ spotlightUrl,e);
+		} finally {
+			IOUtils.closeQuietly(is);
 		}
+		return Annotation.parseAnnotations(xmlDoc);
 	}
 
 	private String getSpottedXml(String text, MGraph graph) {
@@ -427,71 +399,10 @@ public class DBPSpotlightDisambiguateEnh
 		return xml.append("</annotation>").toString();
 	}
 
-	/**
-	 * This method creates the Collection of Annotations, which the method
-	 * <code>createEnhancement</code> adds to the meta data of the content item.
-	 * 
-	 * @param nList
-	 *            NodeList of all Resources contained in the XML response from
-	 *            DBpedia Spotlight
-	 * @return a Collection<DBPSLAnnotation> with all annotations
-	 */
-	private Collection<Annotation> getAnnotations(NodeList nList) {
-		Collection<Annotation> dbpslAnnos = new HashSet<Annotation>();
-
-		for (int temp = 0; temp < nList.getLength(); temp++) {
-			Annotation dbpslann = new Annotation();
-			Element node = (Element) nList.item(temp);
-			dbpslann.uri = new UriRef(node.getAttribute("URI"));
-			dbpslann.support = (new Integer(node.getAttribute("support")))
-					.intValue();
-			dbpslann.types = node.getAttribute("types");
-			dbpslann.surfaceForm = node.getAttribute("surfaceForm");
-			dbpslann.offset = (new Integer(node.getAttribute("offset")))
-					.intValue();
-			dbpslann.similarityScore = (new Double(
-					node.getAttribute("similarityScore"))).doubleValue();
-			dbpslann.percentageOfSecondRank = (new Double(
-					node.getAttribute("percentageOfSecondRank"))).doubleValue();
-
-			dbpslAnnos.add(dbpslann);
-		}
-
-		return dbpslAnnos;
-	}
-
 	public Map<String, Object> getServiceProperties() {
 		return Collections.unmodifiableMap(Collections.singletonMap(
 				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
 	}
 
-	public String getMetadataLanguage(MGraph model, NonLiteral subj) {
-		Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
-		if (it.hasNext()) {
-			Resource langNode = it.next().getObject();
-			return getLexicalForm(langNode);
-		}
-		return null;
-	}
-
-	public String getLexicalForm(Resource res) {
-		if (res == null) {
-			return null;
-		} else if (res instanceof Literal) {
-			return ((Literal) res).getLexicalForm();
-		} else {
-			return res.toString();
-		}
-	}
-
-	/**
-	 * This method is used by the test class to set the endpoint url
-	 * 
-	 * @param url
-	 *            String the url of the Spotlight endpoint
-	 */
-	public void setEndpointUrl(String url) {
-		spotlightUrl = url;
-	}
 
 }