You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/24 15:48:54 UTC

svn commit: r1376912 [2/2] - in /incubator/stanbol/branches/dbpedia-spotlight-engines: ./ bundlelist/src/main/bundles/ engines/ engines/dbpedia-spotlight-annotate/ engines/dbpedia-spotlight-candidates/ engines/dbpedia-spotlight-disambiguate/ engines/db...

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/Annotation.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.model;
+
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.getElementsByTagName;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * Contains a result given by DBPedia Spotlight..
+ * 
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class Annotation {
+
+	/*
+	 * TODO (Note by rwesten 2012-08-22) 
+	 * 
+	 * Added here functionality to extract DBpedia
+	 * Ontoloty types for Annotations. This is mainly to
+	 * choose the best dc:type for fise:TextAnnotations
+	 * created for Annotation.
+	 * 
+	 * This is based on the assumption that the most generic
+	 * dbpedia type is always the last one in the returned list.
+	 * 
+	 * In addition "DBpedia:TopicalConcept" is ignored first
+	 * as it seams not to be used by dbpedia.org and second
+	 * because it is always parsed last (even after schema
+	 * and freebase types) and would therefore be considered
+	 * as the most generic dbpedia type.
+	 * 
+	 * I do not like this solution and would like to find
+	 * a better solution for that
+	 */
+	/**
+	 * Allows to add DBpedia Ontology types that should be
+	 * ignored by {@link #getDbpediaTypeNames()}.<p>
+	 * Introduced this to ignore the "TopicalConcept"
+	 * type.
+	 */
+	public static final Set<String> IGNORED_DBP_TYPES;
+	static {
+		Set<String> ignored = new HashSet<String>();
+		ignored.add("DBpedia:TopicalConcept");
+		IGNORED_DBP_TYPES = Collections.unmodifiableSet(ignored);
+	}
+	
+	public Resource uri;
+	//TODO: change this to a list with the parsed types
+	//      Processing of XML results should be done during parsing
+	public String types;
+	public Integer support;
+	//NOTE rwesten: changed this to embed a SurfaceFrom so that i
+	//     can reuse code for creating fise:TextAnnotations
+	public SurfaceForm surfaceForm;
+	public Double similarityScore;
+	public Double percentageOfSecondRank;
+
+	public List<String> getTypeNames() {
+		if (types != null) {
+			List<String> t = new ArrayList<String>();
+			String[] typex = types.split(",");
+			for (String type : typex) {
+				// make the returned types referenceable
+				String deref = type.replace("DBpedia:", "http://dbpedia.org/ontology/")
+						.replace("Freebase:", "http://www.freebase.com/schema")
+						.replace("Schema:", "http://www.schema.org/");
+				if(!deref.isEmpty()){
+					t.add(deref);
+				}
+			}
+			return t;
+		}
+		return Collections.emptyList();
+	}
+	
+	/**
+	 * Getter for the dbpedia ontology types excluding {@link #IGNORED_DBP_TYPES}
+	 * @return the types or an empty list if none
+	 */
+	public List<String> getDbpediaTypeNames(){
+		if (types != null) {
+			List<String> t = new ArrayList<String>();
+			String[] typex = types.split(",");
+			for (String type : typex) {
+				if(!IGNORED_DBP_TYPES.contains(type) && type.startsWith("DBpedia:")){
+					t.add(type.replace("DBpedia:", "http://dbpedia.org/ontology/"));
+				}
+			}
+			return t;
+		}
+		return Collections.emptyList();
+	}
+
+	public String toString() {
+		return String
+				.format("[uri=%s, support=%i, types=%s, surfaceForm=\"%s\", similarityScore=%d, percentageOfSecondRank=%d]",
+						uri, support, types, surfaceForm,
+						similarityScore, percentageOfSecondRank);
+	}
+
+	/**
+	 * This method parses allAnnotations from the parsed XML {@link Document}
+	 * 
+	 * @param xmlDoc
+	 *            A XML document containing annotations.
+	 * @return a Collection<DBPSLAnnotation> with all annotations
+	 */
+	public static Collection<Annotation> parseAnnotations(Document xmlDoc) {
+		NodeList nList = getElementsByTagName(xmlDoc, "Resource");
+		Collection<Annotation> dbpslAnnos = new HashSet<Annotation>();
+
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			Annotation dbpslann = new Annotation();
+			Element node = (Element) nList.item(temp);
+			dbpslann.uri = new UriRef(node.getAttribute("URI"));
+			dbpslann.support = (new Integer(node.getAttribute("support")))
+					.intValue();
+			dbpslann.types = node.getAttribute("types");
+			dbpslann.surfaceForm = new SurfaceForm();
+			dbpslann.surfaceForm.name = node.getAttribute("surfaceForm");
+			dbpslann.surfaceForm.offset = (new Integer(node.getAttribute("offset")))
+					.intValue();
+			//set the type of the surface form
+			List<String> dbpediaTypes = dbpslann.getDbpediaTypeNames();
+			if(!dbpediaTypes.isEmpty()){
+				//set the last type in the list - the most general one - as type
+				//for the surface form
+				dbpslann.surfaceForm.type = dbpediaTypes.get(dbpediaTypes.size()-1);
+			}
+			dbpslann.similarityScore = (new Double(
+					node.getAttribute("similarityScore"))).doubleValue();
+			dbpslann.percentageOfSecondRank = (new Double(
+					node.getAttribute("percentageOfSecondRank"))).doubleValue();
+
+			dbpslAnnos.add(dbpslann);
+		}
+
+		return dbpslAnnos;
+	}
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/CandidateResource.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.model;
+
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.getElementsByTagName;
+
+import java.util.Collection;
+import java.util.HashSet;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the candidate ressources given by DBPedia Spotlight Candidates.
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class CandidateResource {
+
+	public String label;
+	public String uri;
+	public double contextualScore;
+	public double percentageOfSecondRank;
+	public double support;
+	public double priorScore;
+	public double finalScore;
+
+	public String toString() {
+		return String
+				.format("[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, "
+						+ "percentageOfSecondRank=%d, contextualScore=%d]",
+						label, uri, contextualScore, percentageOfSecondRank,
+						support, priorScore, finalScore);
+	}
+	
+	/**
+	 * This method creates the Collection of surface forms, which the method
+	 * <code>createEnhancement</code> adds to the meta data of the content item
+	 * as TextAnnotations.
+	 * 
+	 * @param nList
+	 *            NodeList of all Resources contained in the XML response from
+	 *            DBpedia Spotlight
+	 * @return a Collection<DBPSLSurfaceForm> with all annotations
+	 */
+	public static Collection<SurfaceForm> parseCandidates(Document xmlDoc) {
+		NodeList nList = getElementsByTagName(xmlDoc,"surfaceForm");
+		Collection<SurfaceForm> dbpslAnnos = new HashSet<SurfaceForm>();
+
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			Element node = (Element) nList.item(temp);
+			SurfaceForm dbpslann = SurfaceForm.parseSerfaceForm(node);
+
+			NodeList resources = node.getChildNodes();
+
+			for (int count = 0; count < resources.getLength(); count++) {
+				Node n = resources.item(count);
+				if (n instanceof Element) {
+					Element r = (Element) n;
+					CandidateResource resource = new CandidateResource();
+					resource.label = r.getAttribute("label");
+					resource.uri = r.getAttribute("uri");
+					resource.contextualScore = (new Double(
+							r.getAttribute("contextualScore"))).doubleValue();
+					resource.percentageOfSecondRank = (new Double(
+							r.getAttribute("percentageOfSecondRank")))
+							.doubleValue();
+					resource.support = (new Double(r.getAttribute("support")))
+							.doubleValue();
+					resource.priorScore = (new Double(
+							r.getAttribute("priorScore"))).doubleValue();
+					resource.finalScore = (new Double(
+							r.getAttribute("finalScore"))).doubleValue();
+					dbpslann.resources.add(resource);
+				}
+
+				// Element r = (Element) resources.item(count);
+			}
+
+			dbpslAnnos.add(dbpslann);
+		}
+
+		return dbpslAnnos;
+	}
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/model/SurfaceForm.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.model;
+
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.getElementsByTagName;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Candidates.
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class SurfaceForm {
+
+	public String name;
+	public String type;
+	public Integer offset;
+	public List<CandidateResource> resources = new ArrayList<CandidateResource>();
+
+	public String toString() {
+		return String.format("[name=%s, offset=%i, type=%s]", name, offset,
+				type);
+	}
+	
+	/**
+	 * Parses all {@link SurfaceForm} data from the parsed XML document
+	 * 
+	 * @param xmlDoc
+	 *            The XML Document containing the surface forms
+	 * @return a Collection<DBPSLSurfaceForm> with all annotations
+	 */
+	public static Collection<SurfaceForm> parseSurfaceForm(Document xmlDoc) {
+		NodeList nList = getElementsByTagName(xmlDoc,"surfaceForm");
+		Collection<SurfaceForm> dbpslAnnos = new HashSet<SurfaceForm>();
+
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			Element node = (Element) nList.item(temp);
+			SurfaceForm dbpslann = parseSerfaceForm(node);
+
+			dbpslAnnos.add(dbpslann);
+		}
+
+		return dbpslAnnos;
+	}
+
+	protected static SurfaceForm parseSerfaceForm(Element node) {
+		SurfaceForm dbpslann = new SurfaceForm();
+		dbpslann.name = node.getAttribute("name");
+		dbpslann.offset = (new Integer(node.getAttribute("offset")))
+				.intValue();
+		dbpslann.type = node.getAttribute("type");
+		return dbpslann;
+	}
+	
+	
+}

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java Fri Aug 24 13:48:52 2012
@@ -16,14 +16,11 @@
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
 
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.XMLParser.getElementsByTagName;
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.XMLParser.loadXMLFromInputStream;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_SPOTTER;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.UTF8;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.utils.XMLParser.loadXMLFromInputStream;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 
 import java.io.BufferedWriter;
 import java.io.ByteArrayOutputStream;
@@ -32,27 +29,17 @@ import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLEncoder;
-import java.nio.charset.Charset;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
 
 import org.apache.clerezza.rdf.core.Language;
-import org.apache.clerezza.rdf.core.Literal;
-import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.UriRef;
-import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.clerezza.rdf.core.serializedform.Serializer;
 import org.apache.commons.io.IOUtils;
@@ -63,22 +50,18 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
 import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
-import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.utils.SpotlightEngineUtils;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
-import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
 /**
@@ -87,17 +70,19 @@ import org.xml.sax.SAXException;
  * 
  * @author Iavor Jelev, Babelmonkeys (GzEvD)
  */
-@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
+@Component(metatype = true, immediate = true, 
+	label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", 
+	description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
 @Service
 @Properties(value = { 
-		@Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot") 
+		@Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot"),
+		@Property(name = PARAM_URL_KEY, value = "http://spotlight.dbpedia.org/rest/spot"),
+		@Property(name = PARAM_SPOTTER)
 })
 public class DBPSpotlightSpotEnhancementEngine extends
 		AbstractEnhancementEngine<IOException, RuntimeException> implements
 		EnhancementEngine, ServiceProperties {
 
-	private static final Charset UTF8 = Charset.forName("UTF-8");
-	
 	/**
 	 * Ensures this engine is deactivated in {@link OfflineMode}
 	 */
@@ -106,38 +91,11 @@ public class DBPSpotlightSpotEnhancement
 	private OnlineMode onlineMode;
 	
 	/**
-	 * a configurable value of the text segment length to check
-	 */
-	@Property(value = "http://spotlight.dbpedia.org/rest/spot")
-	public static final String SL_URL_KEY = "stanbol.DBPSpotlightSpotEnhancementEngine.url";
-
-	@Property(value = "LingPipeSpotter")
-	public static final String SL_SPOTTER = "stanbol.DBPSpotlightSpotEnhancementEngine.spotter";
-
-	/**
 	 * The default value for the Execution of this Engine. Currently set to
 	 * <code>{@link ServiceProperties#ORDERING_CONTENT_EXTRACTION} - 29</code>
 	 */
 	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 29;
 
-	/**
-	 * This contains the only MIME type directly supported by this enhancement
-	 * engine.
-	 */
-	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
-	/**
-	 * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
-	 */
-	private static final Set<String> SUPPORTED_MIMTYPES = Collections
-			.singleton(TEXT_PLAIN_MIMETYPE);
-
-	/**
-	 * This contains a list of languages supported by DBpedia Spotlight. If the
-	 * metadata doesn't contain a value for the language as the value of the
-	 * {@link Property.DC_LANG property} the content can't be processed.
-	 */
-	protected static final Set<String> SUPPORTED_LANGUAGES = Collections
-			.unmodifiableSet(new HashSet<String>(Arrays.asList("en")));
 
 	/** holds the logger. */
 	private static final Logger log = LoggerFactory
@@ -172,24 +130,12 @@ public class DBPSpotlightSpotEnhancement
 		super.activate(ce);
 
 		Dictionary<String, Object> properties = ce.getProperties();
-		Object value = properties.get(SL_URL_KEY);
-		if(value == null || value.toString().isEmpty()){
-			throw new ConfigurationException(SL_URL_KEY, "The URL with the DBpedia "
-					+ "Spotlight Spot RESTful Service MUST NOT be NULL nor empty!");
-		} else {
-			String url = (String) properties.get(SL_URL_KEY);
-			try {
-				this.spotlightUrl = new URL(url);
-			} catch (MalformedURLException e) {
-				throw new ConfigurationException(SL_URL_KEY, "The parsed URL for the "
-						+ "DBpedia Spotlight Spot RESTful Service is illegal formatted!",
-						e);
-			}
-		}
+		spotlightUrl = SpotlightEngineUtils.parseSpotlightServiceURL(properties);
+
 		//also set the spotter to null if an empty string is parsed
-		value = properties.get(SL_SPOTTER);
-		spotlightSpotter = value != null && !value.toString().isEmpty() ?
-				value.toString() : null;
+		Object spotterConfig = properties.get(PARAM_SPOTTER);
+		spotlightSpotter = spotterConfig != null && !spotterConfig.toString().isEmpty() ?
+				spotterConfig.toString() : null;
 	}
 
 	/**
@@ -199,23 +145,8 @@ public class DBPSpotlightSpotEnhancement
 	 *            the {@link ContentItem}
 	 */
 	public int canEnhance(ContentItem ci) throws EngineException {
-		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
-			String language = EnhancementEngineHelper.getLanguage(ci);
-			if (!SUPPORTED_LANGUAGES.contains(language)) {
-				log.info("DBpedia Spotlight can not process ContentItem {} "
-						+ "because language {} is not supported (supported: {})",
-						new Object[] { ci.getUri(), language, SUPPORTED_LANGUAGES });
-				return CANNOT_ENHANCE;
-			}
-			//rwesten: ASYNC support is highly recommended for engines that
-			//         do call remote services
-			return ENHANCE_ASYNC;
-		} else {
-			log.info("DBpedia Spotlight can not process ContentItem {} "
-					+ "because it does not have 'plain/text' content",
-					ci.getUri());
-			return CANNOT_ENHANCE;
-		}
+		return SpotlightEngineUtils.canProcess(ci) ?
+				ENHANCE_ASYNC : CANNOT_ENHANCE;
 	}
 
 	/**
@@ -226,35 +157,8 @@ public class DBPSpotlightSpotEnhancement
 	 *            the {@link ContentItem}
 	 */
 	public void computeEnhancements(ContentItem ci) throws EngineException {
-		Language language;
-		String lang = EnhancementEngineHelper.getLanguage(ci);
-		if(!SUPPORTED_LANGUAGES.contains(lang)){
-			throw new IllegalStateException("Langage '"+lang
-					+ "' as annotated for ContentItem "
-				    + ci.getUri() + " is not supported by this Engine: "
-				    + "This is also checked in the canEnhance method! -> This "
-					+ "indicated an Bug in the implementation of the "
-					+ "EnhancementJobManager!");
-		} else {
-			language = lang == null || lang.isEmpty() ? null : new Language(lang);
-		}
-		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
-				SUPPORTED_MIMTYPES);
-		if (contentPart == null) {
-			throw new IllegalStateException("No ContentPart with Mimetype '"
-					+ TEXT_PLAIN_MIMETYPE
-					+ "' found for ContentItem "
-					+ ci.getUri()
-					+ ": This is also checked in the canEnhance method! -> This "
-					+ "indicated an Bug in the implementation of the "
-					+ "EnhancementJobManager!");
-		}
-		String text = "";
-		try {
-			text = ContentItemHelper.getText(contentPart.getValue());
-		} catch (IOException e) {
-			throw new InvalidContentException(this, ci, e);
-		}
+		Language language = SpotlightEngineUtils.getContentLanguage(ci);
+		String text = SpotlightEngineUtils.getPlainContent(ci);
 
 		Collection<SurfaceForm> dbpslGraph = doPostRequest(text,ci.getUri());
 		if (dbpslGraph != null) {
@@ -292,27 +196,13 @@ public class DBPSpotlightSpotEnhancement
 	 */
 	protected void createEnhancements(Collection<SurfaceForm> occs,
 			ContentItem ci,  String content, Language lang) {
-		LiteralFactory literalFactory = LiteralFactory.getInstance();
 
 		HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, UriRef>();
 
 		MGraph model = ci.getMetadata();
 		for (SurfaceForm occ : occs) {
-			UriRef textAnnotation = EnhancementEngineHelper
-					.createTextEnhancement(ci, this);
-			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
-					new PlainLiteralImpl(occ.name, lang)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_START,
-					literalFactory.createTypedLiteral(occ.offset)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_END,
-					literalFactory.createTypedLiteral(occ.offset
-							+ occ.name.length())));
-			model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef(
-					occ.type)));
-			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
-					new PlainLiteralImpl(
-							getSelectionContext(content, occ.name, occ.offset),
-							lang)));
+			UriRef textAnnotation = SpotlightEngineUtils.createTextEnhancement(
+					occ, this, ci, content, lang);
 			if (entityAnnotationMap.containsKey(occ.name)) {
 				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
 						DC_RELATION, textAnnotation));
@@ -322,6 +212,7 @@ public class DBPSpotlightSpotEnhancement
 		}
 	}
 
+
 	/**
 	 * Sends a POST request to the DBpediaSpotlight url.
 	 * 
@@ -407,93 +298,13 @@ public class DBPSpotlightSpotEnhancement
 		}
 		//rwesten: commented the disconnect to allow keep-alive
 		//connection.disconnect();
-		NodeList nlist = getElementsByTagName(xmlDoc,"surfaceForm");
-	    return getAnnotations(nlist);
+	    return SurfaceForm.parseSurfaceForm(xmlDoc);
 	}
 
-	/**
-	 * This method creates the Collection of surface forms, which the method
-	 * <code>createEnhancement</code> adds to the meta data of the content item
-	 * as TextAnnotations.
-	 * 
-	 * @param nList
-	 *            NodeList of all Resources contained in the XML response from
-	 *            DBpedia Spotlight
-	 * @return a Collection<DBPSLSurfaceForm> with all annotations
-	 */
-	private Collection<SurfaceForm> getAnnotations(NodeList nList) {
-		Collection<SurfaceForm> dbpslAnnos = new HashSet<SurfaceForm>();
-
-		for (int temp = 0; temp < nList.getLength(); temp++) {
-			SurfaceForm dbpslann = new SurfaceForm();
-			Element node = (Element) nList.item(temp);
-			dbpslann.name = node.getAttribute("name");
-			dbpslann.offset = (new Integer(node.getAttribute("offset")))
-					.intValue();
-			dbpslann.type = node.getAttribute("type");
 
-			dbpslAnnos.add(dbpslann);
-		}
-
-		return dbpslAnnos;
-	}
 
 	public Map<String, Object> getServiceProperties() {
 		return Collections.unmodifiableMap(Collections.singletonMap(
 				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
 	}
-// rwesten: Use the Utility provided by the EnhancementEngineHelper instead
-//	public String getMetadataLanguage(MGraph model, NonLiteral subj) {
-//		Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
-//		if (it.hasNext()) {
-//			Resource langNode = it.next().getObject();
-//			return getLexicalForm(langNode);
-//		}
-//		return null;
-//	}
-
-// rwesten: unused
-//	public String getLexicalForm(Resource res) {
-//		if (res == null) {
-//			return null;
-//		} else if (res instanceof Literal) {
-//			return ((Literal) res).getLexicalForm();
-//		} else {
-//			return res.toString();
-//		}
-//	}
-	
-    private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
-    /**
-     * Extracts the selection context based on the content, selection and
-     * the start char offset of the selection
-     * @param content the content
-     * @param selection the selected text
-     * @param selectionStartPos the start char position of the selection
-     * @return the context
-     */
-    protected static String getSelectionContext(String content, String selection,int selectionStartPos){
-        //extract the selection context
-        int beginPos;
-        if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
-            beginPos = 0;
-        } else {
-            int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
-            beginPos = content.indexOf(' ',start);
-            if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
-                beginPos = start; //begin within a word
-            }
-        }
-        int endPos;
-        if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
-            endPos = content.length();
-        } else {
-            int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
-            endPos = content.lastIndexOf(' ', start);
-            if(endPos <= selectionStartPos+selection.length()){
-                endPos = start; //end within a word;
-            }
-        }
-        return content.substring(beginPos, endPos);
-    }
 }

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/SpotlightEngineUtils.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,285 @@
+package org.apache.stanbol.enhancer.engines.dbpspotlight.utils;
+
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PARAM_URL_KEY;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PROPERTY_CONTEXTUAL_SCORE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PROPERTY_FINAL_SCORE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PROPERTY_PERCENTAGE_OF_SECOND_RANK;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PROPERTY_PRIOR_SCORE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PROPERTY_SIMILARITY_SCORE;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.PROPERTY_SUPPORT;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.SUPPORTED_LANGUAGES;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.SUPPORTED_MIMTYPES;
+import static org.apache.stanbol.enhancer.engines.dbpspotlight.Constants.TEXT_PLAIN_MIMETYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Dictionary;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.Constants;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.CandidateResource;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Shared utilities for the Spotlight Enhancement Engines.
+ */
+public class SpotlightEngineUtils {
+
+	private static final Logger log = LoggerFactory.getLogger(SpotlightEngineUtils.class);
+	
+	private static final LiteralFactory literalFactory = LiteralFactory.getInstance();
+	
+    private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
+    
+    public static boolean canProcess(ContentItem ci){
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+			String language = EnhancementEngineHelper.getLanguage(ci);
+			if(!SUPPORTED_LANGUAGES.contains(language)) {
+				log.info("DBpedia Spotlight can not process ContentItem {} "
+						+ "because language {} is not supported (supported: {})",
+						new Object[] { ci.getUri(), language, SUPPORTED_LANGUAGES });
+				return false;
+			} else {
+				return true;
+			}
+		} else {
+			log.info("DBpedia Spotlight can not process ContentItem {} "
+					+ "because it does not have 'plain/text' content",
+					ci.getUri());
+			return false;
+		}
+    }
+	public static Language getContentLanguage(ContentItem ci) {
+		String lang = EnhancementEngineHelper.getLanguage(ci);
+		if(!SUPPORTED_LANGUAGES.contains(lang)){
+			throw new IllegalStateException("Langage '"+lang
+					+ "' as annotated for ContentItem "
+				    + ci.getUri() + " is not supported by this Engine: "
+				    + "This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the "
+					+ "EnhancementJobManager!");
+		} else {
+			return lang == null || lang.isEmpty() ? null : new Language(lang);
+		}
+	}
+	public static String getPlainContent(ContentItem ci) 
+			throws EngineException {
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
+				SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException(
+					"No ContentPart with Mimetype '"
+							+ TEXT_PLAIN_MIMETYPE
+							+ "' found for ContentItem "
+							+ ci.getUri()
+							+ ": This is also checked in the canEnhance method! -> This "
+							+ "indicated an Bug in the implementation of the "
+							+ "EnhancementJobManager!");
+		}
+		try {
+			return ContentItemHelper.getText(contentPart.getValue());
+		} catch (IOException e) {
+			throw new EngineException("Unable to read plain text content form" +
+					"contentpart "+contentPart.getKey()+" of ContentItem " +
+					ci.getUri());
+		}
+	}
+	/**
+	 * Parses the URL from the {@link Constants#PARAM_URL_KEY}
+	 * @param properties the configuration of the engine
+	 * @return the URL of the service
+	 * @throws ConfigurationException if the configuration is missing,
+	 * empty or not a valid URL
+	 */
+	public static URL parseSpotlightServiceURL(
+			Dictionary<String, Object> properties)
+			throws ConfigurationException {
+		Object value = properties.get(PARAM_URL_KEY);
+		if(value == null || value.toString().isEmpty()){
+			throw new ConfigurationException(PARAM_URL_KEY, "The URL with the DBpedia "
+					+ "Spotlight Annotate RESTful Service MUST NOT be NULL nor empty!");
+		} else {
+			try {
+				return new URL(value.toString());
+			} catch (MalformedURLException e) {
+				throw new ConfigurationException(PARAM_URL_KEY, "The parsed URL for the "
+						+ "DBpedia Spotlight Annotate RESTful Service is illegal formatted!",
+						e);
+			}
+		}
+	}
+	/**
+     * Extracts the selection context based on the content, selection and
+     * the start char offset of the selection
+     * @param content the content
+     * @param selection the selected text
+     * @param selectionStartPos the start char position of the selection
+     * @return the context
+     */
+    public static String getSelectionContext(String content, String selection,int selectionStartPos){
+        //extract the selection context
+        int beginPos;
+        if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
+            beginPos = 0;
+        } else {
+            int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            beginPos = content.indexOf(' ',start);
+            if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
+                beginPos = start; //begin within a word
+            }
+        }
+        int endPos;
+        if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
+            endPos = content.length();
+        } else {
+            int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            endPos = content.lastIndexOf(' ', start);
+            if(endPos <= selectionStartPos+selection.length()){
+                endPos = start; //end within a word;
+            }
+        }
+        return content.substring(beginPos, endPos);
+    }
+    /**
+     * Creates a fise:TextAnnotation for the parsed parameters and
+     * adds it the the {@link ContentItem#getMetadata()}. <p>
+     * This method assumes a write lock on the parsed content item.
+     * @param occ the SurfaceForm
+     * @param engine the Engine
+     * @param ci the ContentITem
+     * @param content the content 
+     * @param lang the language of the content or <code>null</code>
+     * @return the URI of the created fise:TextAnnotation
+     */
+	public static UriRef createTextEnhancement(SurfaceForm occ,
+			EnhancementEngine engine, ContentItem ci, String content,
+			Language lang) {
+		MGraph model = ci.getMetadata();
+		UriRef textAnnotation = EnhancementEngineHelper
+				.createTextEnhancement(ci, engine);
+		model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
+				new PlainLiteralImpl(occ.name, lang)));
+		model.add(new TripleImpl(textAnnotation, ENHANCER_START,
+				literalFactory.createTypedLiteral(occ.offset)));
+		model.add(new TripleImpl(textAnnotation, ENHANCER_END,
+				literalFactory.createTypedLiteral(occ.offset
+						+ occ.name.length())));
+		if(occ.type != null && !occ.type.isEmpty()){
+			model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef(
+					occ.type)));
+		}
+		model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
+				new PlainLiteralImpl(
+						getSelectionContext(content, occ.name, occ.offset),
+						lang)));
+		return textAnnotation;
+	}
+	/**
+	 * Creates a fise:EntityAnnotation for the parsed parameters and
+     * adds it the the {@link ContentItem#getMetadata()}. <p>
+     * This method assumes a write lock on the parsed content item.
+	 * @param resource the candidate resource
+	 * @param engine the engine
+	 * @param ci the content item
+	 * @param textAnnotation the fise:TextAnnotation to dc:relate the
+	 * created fise:EntityAnnotation
+	 * @return the URI of the created fise:TextAnnotation
+	 */
+	public static UriRef createEntityAnnotation(CandidateResource resource,
+			EnhancementEngine engine, ContentItem ci, UriRef textAnnotation) {
+		UriRef entityAnnotation = EnhancementEngineHelper
+				.createEntityEnhancement(ci, engine);
+		MGraph model = ci.getMetadata();
+		Literal label = new PlainLiteralImpl(resource.label,
+				new Language("en"));
+		model.add(new TripleImpl(entityAnnotation, DC_RELATION,
+				textAnnotation));
+		model.add(new TripleImpl(entityAnnotation,
+				ENHANCER_ENTITY_LABEL, label));
+		model.add(new TripleImpl(entityAnnotation,
+				ENHANCER_ENTITY_REFERENCE, new UriRef(resource.uri)));
+		model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE,
+				literalFactory.createTypedLiteral(resource.contextualScore)));
+		model.add(new TripleImpl(entityAnnotation,PROPERTY_PERCENTAGE_OF_SECOND_RANK,
+				literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
+		model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory
+				.createTypedLiteral(resource.support)));
+		model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory
+				.createTypedLiteral(resource.priorScore)));
+		model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory
+				.createTypedLiteral(resource.finalScore)));
+		return entityAnnotation;
+	}
+	/**
+	 * Creates a fise:EntityAnnotation for the parsed parameter and
+     * adds it the the {@link ContentItem#getMetadata()}. <p>
+     * This method assumes a write lock on the parsed content item.
+	 * @param annotation the Annotation
+	 * @param engine the engine
+	 * @param ci the language
+	 * @param textAnnotation the TextAnnotation the created
+	 * EntityAnnotation links by using dc:relation
+	 * @param language the language of the label of the referenced
+	 * Entity (or <code>null</code> if none).
+	 */
+	public static void createEntityAnnotation(Annotation annotation, 
+			EnhancementEngine engine, ContentItem ci,
+			UriRef textAnnotation, Language language) {
+		MGraph model = ci.getMetadata();
+		UriRef entityAnnotation = EnhancementEngineHelper
+				.createEntityEnhancement(ci, engine);
+		Literal label = new PlainLiteralImpl(annotation.surfaceForm.name,
+				language);
+		model.add(new TripleImpl(entityAnnotation, DC_RELATION,
+				textAnnotation));
+		model.add(new TripleImpl(entityAnnotation,
+				ENHANCER_ENTITY_LABEL, label));
+		model.add(new TripleImpl(entityAnnotation,
+				ENHANCER_ENTITY_REFERENCE, annotation.uri));
+		//set the fise:entity-type
+		for(String type : annotation.getTypeNames()){
+			UriRef annotationType = new UriRef(type);
+			model.add(new TripleImpl(entityAnnotation,
+					ENHANCER_ENTITY_TYPE, annotationType));
+		}
+		//TODO (rwesten): Pleas check: I use the similarityScore as fise:confidence value
+		model.add(new TripleImpl(entityAnnotation, ENHANCER_CONFIDENCE, literalFactory
+				.createTypedLiteral(annotation.similarityScore)));
+		//add spotlight specific information
+		model.add(new TripleImpl(entityAnnotation,PROPERTY_PERCENTAGE_OF_SECOND_RANK,
+				literalFactory.createTypedLiteral(annotation.percentageOfSecondRank)));
+		model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory
+				.createTypedLiteral(annotation.support)));
+		model.add(new TripleImpl(entityAnnotation, PROPERTY_SIMILARITY_SCORE, literalFactory
+				.createTypedLiteral(annotation.similarityScore)));
+	}
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/XMLParser.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/XMLParser.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/utils/XMLParser.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public final class XMLParser {
+
+	/**
+	 * Do not create instances of Utility Classes
+	 */
+	private XMLParser(){};
+	
+	public static NodeList getElementsByTagName(Document doc, String tagName) {
+
+		return doc.getElementsByTagName(tagName);
+	}
+
+	public static Document loadXMLFromString(String xml) throws SAXException,
+			IOException {
+		Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
+				xml.getBytes()));
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	public static Document loadXMLFromInputStream(InputStream is) throws SAXException,
+			IOException {
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		factory.setNamespaceAware(true);
+		DocumentBuilder builder = null;
+		try {
+			builder = factory.newDocumentBuilder();
+		} catch (ParserConfigurationException ex) {
+		}
+		Document doc = builder.parse(is);
+		is.close();
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	public static Document loadXMLFromFile(String filePath)
+			throws ParserConfigurationException, SAXException, IOException {
+		File fXmlFile = new File(filePath);
+		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+		Document doc = dBuilder.parse(fXmlFile);
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+}
\ No newline at end of file

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1376912&r1=1376397&r2=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/OSGI-INF/metatype/metatype.properties Fri Aug 24 13:48:52 2012
@@ -21,22 +21,50 @@
 # descriptions as used in the metatype.xml descriptor generated by the
 # the maven SCR plugin
 
-stanbol.DBPSpotlightAnnotateEnhancementEngine.name = DBpedia Spotlight Annotate: Named Entity Extraction and Ontology Linking
-stanbol.DBPSpotlightAnnotateEnhancementEngine.description = Find names of people, organization, \
- places... disambiguate and link them to DBpedia Ontology URIs. This is a complete EnhancementChain, all in one Engine.
-stanbol.DBPSpotlightAnnotateEnhancementEngine.url.name = Spotlight URL
-stanbol.DBPSpotlightAnnotateEnhancementEngine.url.description = The URL which will be used for the request
-stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter.name = Spotter
-stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter.description = The algorithm which will be used for Spotting \
+# Request Properties (shared by all Engines)
+
+dbpedia.spotlight.url.name = Spotlight URL
+dbpedia.spotlight.url.description = The URL which will be used for the request
+dbpedia.spotlight.spotter.name = Spotter
+dbpedia.spotlight.spotter.description = The algorithm which will be used for Spotting \
     (aka Term Recognition). Currently available: NER, LingPipeSpotter, OpenNLPChunkerSpotter, Kea
-stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator.name = Disambiguator
-stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator.description = The algorithm used for ranking of senses \
+dbpedia.spotlight.disambiguator.name = Disambiguator
+dbpedia.spotlight.disambiguator.description = The algorithm used for ranking of senses \
      based on context. Currently available: Document, Occurrences
-stanbol.DBPSpotlightAnnotateEnhancementEngine.types.name = Types Restriction
-stanbol.DBPSpotlightAnnotateEnhancementEngine.types.description = The DBpedia Ontology types you wish to restrict your results to
-stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql.name = Sparql
-stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql.description = Restrict the result with SPARQL
-stanbol.DBPSpotlightAnnotateEnhancementEngine.support.name = Support
-stanbol.DBPSpotlightAnnotateEnhancementEngine.support.description = Filter the results based on a support metric
-stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence.name = Confidence
-stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence.description = Filter the results based on a confidence metric
+dbpedia.spotlight.types.name = Types Restriction
+dbpedia.spotlight.types.description = The DBpedia Ontology types you wish to restrict your results to
+dbpedia.spotlight.sparql.name = Sparql
+dbpedia.spotlight.sparql.description = Restrict the result with SPARQL
+dbpedia.spotlight.support.name = Support
+dbpedia.spotlight.support.description = Filter the results based on a support metric
+dbpedia.spotlight.confidence.name = Confidence
+dbpedia.spotlight.confidence.description = Filter the results based on a confidence metric
+
+
+#Annotate
+
+dbpedia.spotlight.name = DBpedia Spotlight Annotate: Named Entity Extraction and Ontology Linking
+dbpedia.spotlight.description = Find names of people, organization, \
+ places... disambiguate and link them to DBpedia Ontology URIs. This is a complete EnhancementChain, all in one Engine.
+
+
+# SPOT
+
+stanbol.DBPSpotlightSpotEnhancementEngine.name = DBpedia Spotlight Spotter: Named Entity Recognition
+stanbol.DBPSpotlightSpotEnhancementEngine.description = This engine performs just Named Entity Recognition, \
+    so it is suited for EnhancementChain scenario, in which another Engine links the recognized TextAnnotations \
+    to Ontology Types
+
+# Candidates
+
+stanbol.DBPSpotlightCandidatesEnhancementEngine.name = DBpedia Spotlight Candidates: Named Entity Extraction and Ontology Linking
+stanbol.DBPSpotlightCandidatesEnhancementEngine.description = Find names of people, organization, \
+ places... disambiguate and link them to DBpedia Ontology URIs. The difference to the DBPSpotlightAnnotateEnhancementEngine is that  \
+ all candidate URIs for a given TextAnnotation are delivered, as opposed to just the top K 
+
+
+#Disambiguate
+
+stanbol.DBPSpotlightDisambiguateEnhancementEngine.name = DBpedia Spotlight Disambiguate: Disambiguation and Ontology Linking
+stanbol.DBPSpotlightDisambiguateEnhancementEngine.description = It uses TextAnnotations added by a Spotter, so it can only be used \
+ in an EnhancementChain context. It disambiguates and links them to DBpedia Ontology URIs. 

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-dbpspotlight.config
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-dbpspotlight.config?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-dbpspotlight.config (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/main/resources/config/org.apache.stanbol.enhancer.chain.weighted.impl.WeightedChain-dbpspotlight.config Fri Aug 24 13:48:52 2012
@@ -0,0 +1,2 @@
+stanbol.enhancer.chain.name="dbpedia-spotlight"
+stanbol.enhancer.chain.weighted.chain=["tika;optional","metaxa;optional","langdetect","dbpspotlightannotate"]

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-annotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/annotate/DBPSpotlightAnnotateEnhancementTest.java Fri Aug 24 13:48:52 2012
@@ -33,8 +33,8 @@ import org.apache.clerezza.rdf.core.UriR
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
-import org.apache.stanbol.enhancer.engines.dbpspotlight.annotate.Annotation;
-import org.apache.stanbol.enhancer.engines.dbpspotlight.annotate.DBPSpotlightAnnotateEnhancementEngine;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.Constants;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
@@ -48,7 +48,6 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
-import org.osgi.service.cm.ConfigurationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -66,9 +65,9 @@ public class DBPSpotlightAnnotateEnhance
 	private static final Logger LOG = LoggerFactory
 			.getLogger(DBPSpotlightAnnotateEnhancementTest.class);
 	private static String SPL_URL = System
-			.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
+			.getProperty(Constants.PARAM_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
 			: (String) System
-					.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY);
+					.getProperty(Constants.PARAM_URL_KEY);
 	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
 	private static DBPSpotlightAnnotateEnhancementEngine dbpslight;
 

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-candidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/candidates/DBPSpotlightCandidatesEnhancementTest.java Fri Aug 24 13:48:52 2012
@@ -33,8 +33,8 @@ import org.apache.clerezza.rdf.core.UriR
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
-import org.apache.stanbol.enhancer.engines.dbpspotlight.candidates.DBPSpotlightCandidatesEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.dbpspotlight.candidates.SurfaceForm;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.Constants;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
@@ -48,7 +48,6 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
-import org.osgi.service.cm.ConfigurationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -66,9 +65,9 @@ public class DBPSpotlightCandidatesEnhan
 	private static final Logger LOG = LoggerFactory
 			.getLogger(DBPSpotlightCandidatesEnhancementTest.class);
 	private static String SPL_URL = System
-			.getProperty(DBPSpotlightCandidatesEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/candidates"
+			.getProperty(Constants.PARAM_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/candidates"
 			: (String) System
-					.getProperty(DBPSpotlightCandidatesEnhancementEngine.SL_URL_KEY);
+					.getProperty(Constants.PARAM_URL_KEY);
 	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday.";
 	private static DBPSpotlightCandidatesEnhancementEngine dbpslight;
 

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementTest.java?rev=1376912&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementTest.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementTest.java Fri Aug 24 13:48:52 2012
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
+
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.ENHANCE_ASYNC;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.Constants;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Annotate
+ * EnhancementEngine.
+ * 
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightDisambiguateEnhancementTest {
+
+	/**
+	 * This contains the logger.
+	 */
+	private static final Logger LOG = LoggerFactory
+			.getLogger(DBPSpotlightDisambiguateEnhancementTest.class);
+	private static String SPL_URL = System
+			.getProperty(Constants.PARAM_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
+			: (String) System
+					.getProperty(Constants.PARAM_URL_KEY);
+	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday.";
+	private static DBPSpotlightDisambiguateEnhancementEngine dbpslight;
+	private static String testFile = "spots.xml";
+	private static String spotsXml;
+
+	private static ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+	
+	private ContentItem ci;
+	private static Entry<UriRef, Blob> textContentPart;
+
+	@BeforeClass
+	public static void oneTimeSetup() throws Exception {
+		dbpslight = new DBPSpotlightDisambiguateEnhancementEngine(new URL(SPL_URL));
+	}
+	
+	@Before
+	public void initTest() throws IOException {
+		//create the contentItem for testing
+		ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
+		assertNotNull(ci);
+		textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
+		assertNotNull(textContentPart);
+		//add the language of the text
+		ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, 
+				new PlainLiteralImpl("en")));
+		assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
+		
+		LiteralFactory lf = LiteralFactory.getInstance();
+
+		//we need also to create a fise:TextAnnotation to test disambiguation
+		String selected = "Angela Merkel";
+		Language en = new Language("en");
+		UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, 
+				new DBPSpotlightSpotEnhancementEngine());
+		MGraph model = ci.getMetadata();
+		model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, 
+				new PlainLiteralImpl(selected,en)));
+		model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, 
+				new PlainLiteralImpl(TEST_TEXT,en)));
+		model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, 
+				lf.createTypedLiteral(TEST_TEXT.indexOf(selected))));
+		model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, 
+				lf.createTypedLiteral(TEST_TEXT.indexOf(selected)+selected.length())));
+		model.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, 
+				OntologicalClasses.DBPEDIA_PERSON));
+		//validate that the created TextAnnotation is valid (test the test ...)
+		EnhancementStructureHelper.validateAllTextAnnotations(model, TEST_TEXT, null);
+	}
+
+	@Test
+	public void testEntityExtraction() {
+		Collection<Annotation> entities;
+		try {
+			spotsXml = IOUtils.toString(this.getClass().getClassLoader()
+					.getResourceAsStream(testFile));
+			System.out.println(SPL_URL);
+			entities = dbpslight.doPostRequest(TEST_TEXT, spotsXml,ci.getUri());
+			LOG.info("Found entities: {}", entities.size());
+			LOG.debug("Entities:\n{}", entities);
+			Assert.assertFalse("No entities were found!", entities.isEmpty());
+		} catch (Exception e) {
+			Assert.assertFalse("An EngineException occurred! The message was: "
+					+ e.getMessage(), true);
+		}
+	}
+	@Test
+	public void testCanEnhance() throws EngineException {
+		assertEquals(ENHANCE_ASYNC, dbpslight.canEnhance(ci));
+	}
+	
+	/**
+	 * Validates the Enhancements created by this engine
+	 * @throws EngineException
+	 */
+	@Test
+	public void testEnhancement() throws EngineException {
+		dbpslight.computeEnhancements(ci);
+        HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
+        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
+        		dbpslight.getClass().getName()));
+		//validate fise:EntityAnnotations
+		EnhancementStructureHelper.validateAllEntityAnnotations(
+				ci.getMetadata(), expectedValues);
+	}
+}

Copied: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java (from r1376420, incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java)
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java?p2=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java&p1=incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java&r1=1376420&r2=1376912&rev=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementTest.java Fri Aug 24 13:48:52 2012
@@ -16,7 +16,6 @@
  */
 package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
 
-import static org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine.SL_URL_KEY;
 import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.ENHANCE_ASYNC;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -35,23 +34,20 @@ import org.apache.clerezza.rdf.core.UriR
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
-import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.SurfaceForm;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.Constants;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
-import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
-import org.osgi.service.cm.ConfigurationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -69,9 +65,9 @@ public class DBPSpotlightSpotEnhancement
 	private static final Logger LOG = LoggerFactory
 			.getLogger(DBPSpotlightSpotEnhancementTest.class);
 	private static String SPL_URL = System
-			.getProperty(SL_URL_KEY) == null ? 
+			.getProperty(Constants.PARAM_URL_KEY) == null ? 
 					"http://spotlight.dbpedia.org/rest/spot" : 
-						(String) System.getProperty(SL_URL_KEY);
+						(String) System.getProperty(Constants.PARAM_URL_KEY);
 	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
 	private static DBPSpotlightSpotEnhancementEngine dbpslight;
 

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml?rev=1376912&r1=1376911&r2=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml Fri Aug 24 13:48:52 2012
@@ -57,9 +57,6 @@
     <module>opencalais</module> <!-- http://opencalais.com/ -->
     <module>zemanta</module> <!-- htt://zemanta.com -->
     <!-- DBpedia.org Spotlight Enhancement Engines (STANBOL-706) -->
-    <module>dbpedia-spotlight-annotate</module>
-    <module>dbpedia-spotlight-candidates</module>
-    <module>dbpedia-spotlight-disambiguate</module>
-    <module>dbpedia-spotlight-spot</module>
+    <module>dbpedia-spotlight</module>
   </modules>
 </project>

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java?rev=1376912&r1=1376911&r2=1376912&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java Fri Aug 24 13:48:52 2012
@@ -420,7 +420,8 @@ public class EnhancementStructureHelper 
             XSD.dateTime.equals(((TypedLiteral)createdResource).getDataType()));
         Date creationDate = LiteralFactory.getInstance().createObject(Date.class, (TypedLiteral)createdResource);
         assertNotNull("Unable to convert "+createdResource+" to a Java Date object",creationDate);
-        assertTrue("CreationDate MUST NOT be in the Future",new Date().after(creationDate));
+        Date now = new Date();
+        assertTrue("CreationDate MUST NOT be in the Future",now.after(creationDate) || now.equals(creationDate));
         assertFalse("Only a single createnDate MUST BE present", createdIterator.hasNext());
         //validate optional modification date if present
         Iterator<Triple> modDateIterator = enhancements.filter(enhancement, DCTERMS.modified, null);