You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/20 19:14:58 UTC

svn commit: r1375110 [2/2] - in /incubator/stanbol/branches/dbpedia-spotlight-engines/engines: ./ dbpedia-spotlight-annotate/ dbpedia-spotlight-annotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/ dbpedia-spotlight-annotate/src/main...

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/DBPSpotlightDisambiguateEnhancementEngine.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,497 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightDisambiguateEnhancementEngine} provides functionality to
+ * enhance document with their language.
+ * 
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.name", description = "%stanbol.DBPSpotlightDisambiguateEnhancementEngine.description")
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightdisambiguate") })
+public class DBPSpotlightDisambiguateEnhancementEngine extends
+		AbstractEnhancementEngine<IOException, RuntimeException> implements
+		EnhancementEngine, ServiceProperties {
+
+	// all parameters which can be used to configure the EnhancementEngine
+	@Property(value = "http://spotlight.dbpedia.org/rest/annotate")
+	public static final String SL_URL_KEY = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.url";
+
+	@Property(value = "Document")
+	public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.disambiguator";
+
+	@Property()
+	public static final String SL_RESTRICTION = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.types";
+
+	@Property()
+	public static final String SL_SPARQL = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.sparql";
+
+	@Property()
+	public static final String SL_SUPPORT = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.support";
+
+	@Property()
+	public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightDisambiguateEnhancementEngine.confidence";
+
+	/**
+	 * The default value for the Execution of this Engine. Currently set to
+	 * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+	 */
+	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 31;
+
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+	/** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
+	private static final Set<String> SUPPORTED_MIMTYPES = Collections
+			.singleton(TEXT_PLAIN_MIMETYPE);
+	/** This contains the logger. */
+	private static final Logger log = LoggerFactory
+			.getLogger(DBPSpotlightDisambiguateEnhancementEngine.class);
+	/** holds the url of the Spotlight REST endpoint */
+	private String spotlightUrl;
+	/** holds the chosen of disambiguator to be used */
+	private String spotlightDisambiguator;
+	/** holds the type restriction for the results, if the user wishes one */
+	private String spotlightTypesRestriction;
+	/** holds the chosen minimal support value */
+	private String spotlightSupport;
+	/** holds the chosen minimal confidence value */
+	private String spotlightConfidence;
+	/** holds the sparql restriction for the results, if the user wishes one */
+	private String spotlightSparql;
+	/**
+	 * holds the existing TextAnnotations, which are used as input for DBpedia
+	 * Spotlight, and later for linking of the results
+	 */
+	private Hashtable<String, UriRef> textAnnotationsMap;
+
+	/**
+	 * Initialize all parameters from the configuration panel, or with their
+	 * default values
+	 * 
+	 * @param ce
+	 *            the {@link ComponentContext}
+	 */
+	@SuppressWarnings("unchecked")
+	protected void activate(ComponentContext ce) throws ConfigurationException,
+			IOException {
+
+		super.activate(ce);
+
+		Dictionary<String, Object> properties = ce.getProperties();
+		spotlightUrl = properties.get(SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
+				: (String) properties.get(SL_URL_KEY);
+		spotlightDisambiguator = properties.get(SL_DISAMBIGUATOR) == null ? null
+				: (String) properties.get(SL_DISAMBIGUATOR);
+		spotlightTypesRestriction = properties.get(SL_RESTRICTION) == null ? null
+				: (String) properties.get(SL_RESTRICTION);
+		spotlightSparql = properties.get(SL_SPARQL) == null ? null
+				: (String) properties.get(SL_SPARQL);
+		spotlightSupport = properties.get(SL_SUPPORT) == null ? "-1"
+				: (String) properties.get(SL_SUPPORT);
+		spotlightConfidence = properties.get(SL_CONFIDENCE) == null ? "-1"
+				: (String) properties.get(SL_CONFIDENCE);
+	}
+
+	/**
+	 * Check if the content can be enhanced
+	 * 
+	 * @param ci
+	 *            the {@link ContentItem}
+	 */
+	public int canEnhance(ContentItem ci) throws EngineException {
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+			return ENHANCE_SYNCHRONOUS;
+		} else {
+			return CANNOT_ENHANCE;
+		}
+	}
+
+	/**
+	 * Calculate the enhancements by doing a POST request to the DBpedia
+	 * Spotlight endpoint and processing the results
+	 * 
+	 * @param ci
+	 *            the {@link ContentItem}
+	 */
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
+				SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException(
+					"No ContentPart with Mimetype '"
+							+ TEXT_PLAIN_MIMETYPE
+							+ "' found for ContentItem "
+							+ ci.getUri()
+							+ ": This is also checked in the canEnhance method! -> This "
+							+ "indicated an Bug in the implementation of the "
+							+ "EnhancementJobManager!");
+		}
+		String text = "";
+		try {
+			text = ContentItemHelper.getText(contentPart.getValue());
+
+		} catch (IOException e) {
+			throw new InvalidContentException(this, ci, e);
+		}
+
+		// Retrieve the existing text annotations (requires read lock)
+		MGraph graph = ci.getMetadata();
+		String xmlTextAnnotations = this.getSpottedXml(text, graph);
+		Collection<Annotation> dbpslGraph = doPostRequest(text,
+				xmlTextAnnotations);
+		if (dbpslGraph != null) {
+			// Acquire a write lock on the ContentItem when adding the
+			// enhancements
+			ci.getLock().writeLock().lock();
+			try {
+				createEnhancements(dbpslGraph, ci);
+				if (log.isDebugEnabled()) {
+					Serializer serializer = Serializer.getInstance();
+					ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
+					serializer.serialize(debugStream, ci.getMetadata(),
+							"application/rdf+xml");
+					try {
+						log.debug("DBpedia Enhancements:\n{}",
+								debugStream.toString("UTF-8"));
+					} catch (UnsupportedEncodingException e) {
+						e.printStackTrace();
+					}
+				}
+			} finally {
+				ci.getLock().writeLock().unlock();
+			}
+		}
+	}
+
+	/**
+	 * The method adds the returned DBpedia Spotlight annotations to the content
+	 * item's metadata. For each DBpedia resource an EntityAnnotation is created
+	 * and linked to the according TextAnnotation.
+	 * 
+	 * @param occs
+	 *            a Collection of entity information
+	 * @param ci
+	 *            the content item
+	 */
+	public void createEnhancements(Collection<Annotation> occs,
+			ContentItem ci) {
+		final Language language; // used for plain literals representing parts
+									// fo the content
+		String langString = getMetadataLanguage(ci.getMetadata(), null);
+
+		if (langString != null && !langString.isEmpty()) {
+			language = new Language(langString);
+		} else {
+			language = null;
+		}
+
+		HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
+
+		for (Annotation occ : occs) {
+
+			if (textAnnotationsMap.get(occ.surfaceForm) != null) {
+				UriRef textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
+				MGraph model = ci.getMetadata();
+				UriRef entityAnnotation = EnhancementEngineHelper
+						.createEntityEnhancement(ci, this);
+				entityAnnotationMap.put(occ.uri, entityAnnotation);
+				Literal label = new PlainLiteralImpl(occ.surfaceForm, language);
+				model.add(new TripleImpl(entityAnnotation, DC_RELATION,
+						textAnnotation));
+				model.add(new TripleImpl(entityAnnotation,
+						ENHANCER_ENTITY_LABEL, label));
+
+				HashSet<String> t = occ.getTypeNames();
+				if (t != null) {
+					Iterator<String> it = t.iterator();
+					while (it.hasNext())
+						model.add(new TripleImpl(entityAnnotation,
+								ENHANCER_ENTITY_TYPE, new UriRef(it.next())));
+				}
+				model.add(new TripleImpl(entityAnnotation,
+						ENHANCER_ENTITY_REFERENCE, occ.uri));
+			}
+		}
+	}
+
+	/**
+	 * Sends a POST request to the DBpediaSpotlight url.
+	 * 
+	 * @param text
+	 *            a <code>String</code> with the text to be analyzed
+	 * @param xmlTextAnnotations
+	 * @param textAnnotations
+	 * @return a <code>String</code> with the server response
+	 * @throws EngineException
+	 *             if the request cannot be sent
+	 */
+	public Collection<Annotation> doPostRequest(String text,
+			String xmlTextAnnotations) throws EngineException {
+		StringBuilder data = new StringBuilder();
+
+		try {
+			data.append(URLEncoder.encode("spotter=SpotXmlParser", "UTF-8")
+					+ "&");
+			if (spotlightDisambiguator != null
+					&& !spotlightDisambiguator.isEmpty())
+				data.append(URLEncoder.encode("disambiguator", "UTF-8") + "="
+						+ URLEncoder.encode(spotlightDisambiguator, "UTF-8")
+						+ "&");
+			if (spotlightTypesRestriction != null
+					&& !spotlightTypesRestriction.isEmpty())
+				data.append(URLEncoder.encode("types", "UTF-8") + "="
+						+ URLEncoder.encode(spotlightTypesRestriction, "UTF-8")
+						+ "&");
+			if (spotlightSupport != null && !spotlightSupport.isEmpty())
+				data.append(URLEncoder.encode("support", "UTF-8") + "="
+						+ URLEncoder.encode(spotlightSupport, "UTF-8") + "&");
+			if (spotlightConfidence != null && !spotlightConfidence.isEmpty())
+				data.append(URLEncoder.encode("confidence", "UTF-8") + "="
+						+ URLEncoder.encode(spotlightConfidence, "UTF-8") + "&");
+			if (spotlightSparql != null && !spotlightSparql.isEmpty()
+					&& spotlightTypesRestriction == null)
+				data.append(URLEncoder.encode("sparql", "UTF-8") + "="
+						+ URLEncoder.encode(spotlightSparql, "UTF-8") + "&");
+			data.append(URLEncoder.encode("text", "UTF-8") + "="
+					+ URLEncoder.encode(xmlTextAnnotations, "UTF-8"));
+		} catch (UnsupportedEncodingException e) {
+			throw new EngineException(
+					"Data for the httprequest could not be converted. Error: "
+							+ e.getMessage());
+		}
+
+		HttpURLConnection connection = null;
+		StringBuffer response = new StringBuffer();
+
+		try {
+			// Create connection
+			URL url = new URL(spotlightUrl);
+			connection = (HttpURLConnection) url.openConnection();
+			connection.setRequestMethod("POST");
+			connection.setRequestProperty("Content-Type",
+					"application/x-www-form-urlencoded");
+			connection.setRequestProperty("Accept", "text/xml");
+
+			connection.setUseCaches(false);
+			connection.setDoInput(true);
+			connection.setDoOutput(true);
+
+			// Send request
+			DataOutputStream wr = new DataOutputStream(
+					connection.getOutputStream());
+			wr.writeBytes(data.toString());
+			wr.flush();
+			wr.close();
+
+			// Get Response
+			InputStream is = connection.getInputStream();
+			BufferedReader rd = new BufferedReader(new InputStreamReader(is));
+			String line;
+			while ((line = rd.readLine()) != null) {
+				response.append(line);
+				response.append('\r');
+			}
+			rd.close();
+
+		} catch (Exception e) {
+			log.error("[request - error] The following error occurred: "
+					+ e.getMessage());
+
+		} finally {
+
+			if (connection != null) {
+				connection.disconnect();
+			}
+		}
+
+		XMLParser xmlParser = new XMLParser();
+		try {
+			Document xmlDoc = xmlParser.loadXMLFromString(response.toString());
+			NodeList nlist = xmlParser.getElementsByTagName(xmlDoc, "Resource");
+			Collection<Annotation> annos = this.getAnnotations(nlist);
+
+			return annos;
+		} catch (Exception e) {
+			throw new EngineException(
+					"Response XML could not be parsed. Error: "
+							+ e.getMessage());
+		}
+	}
+
+	private String getSpottedXml(String text, MGraph graph) {
+		StringBuilder xml = new StringBuilder();
+		textAnnotationsMap = new Hashtable<String, UriRef>();
+
+		xml.append(String.format("<annotation text=\"%s\">", text));
+		try {
+			for (Iterator<Triple> it = graph.filter(null, RDF_TYPE,
+					TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext();) {
+				// Triple tAnnotation = it.next();
+				UriRef uri = (UriRef) it.next().getSubject();
+				String surfaceForm = EnhancementEngineHelper.getString(graph,
+						uri, ENHANCER_SELECTED_TEXT);
+				if (surfaceForm != null) {
+					String offset = EnhancementEngineHelper.getString(graph,
+							uri, ENHANCER_START);
+					textAnnotationsMap.put(surfaceForm, uri);
+					xml.append(String.format(
+							"<surfaceForm name=\"%s\" offset=\"%s\"/>",
+							surfaceForm, offset));
+				}
+			}
+		} catch (Exception e) {
+			log.error(e.getMessage());
+		}
+
+		return xml.append("</annotation>").toString();
+	}
+
+	/**
+	 * This method creates the Collection of Annotations, which the method
+	 * <code>createEnhancement</code> adds to the meta data of the content item.
+	 * 
+	 * @param nList
+	 *            NodeList of all Resources contained in the XML response from
+	 *            DBpedia Spotlight
+	 * @return a Collection<DBPSLAnnotation> with all annotations
+	 */
+	private Collection<Annotation> getAnnotations(NodeList nList) {
+		Collection<Annotation> dbpslAnnos = new HashSet<Annotation>();
+
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			Annotation dbpslann = new Annotation();
+			Element node = (Element) nList.item(temp);
+			dbpslann.uri = new UriRef(node.getAttribute("URI"));
+			dbpslann.support = (new Integer(node.getAttribute("support")))
+					.intValue();
+			dbpslann.types = node.getAttribute("types");
+			dbpslann.surfaceForm = node.getAttribute("surfaceForm");
+			dbpslann.offset = (new Integer(node.getAttribute("offset")))
+					.intValue();
+			dbpslann.similarityScore = (new Double(
+					node.getAttribute("similarityScore"))).doubleValue();
+			dbpslann.percentageOfSecondRank = (new Double(
+					node.getAttribute("percentageOfSecondRank"))).doubleValue();
+
+			dbpslAnnos.add(dbpslann);
+		}
+
+		return dbpslAnnos;
+	}
+
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(
+				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+	public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+		Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+		if (it.hasNext()) {
+			Resource langNode = it.next().getObject();
+			return getLexicalForm(langNode);
+		}
+		return null;
+	}
+
+	public String getLexicalForm(Resource res) {
+		if (res == null) {
+			return null;
+		} else if (res instanceof Literal) {
+			return ((Literal) res).getLexicalForm();
+		} else {
+			return res.toString();
+		}
+	}
+
+	/**
+	 * This method is used by the test class to set the endpoint url
+	 * 
+	 * @param url
+	 *            String the url of the Spotlight endpoint
+	 */
+	public void setEndpointUrl(String url) {
+		spotlightUrl = url;
+	}
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/XMLParser.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+
+public class XMLParser {
+
+	public NodeList getElementsByTagName(Document doc, String tagName) {
+
+		return doc.getElementsByTagName(tagName);
+	}
+
+	public Document loadXMLFromString(String xml) throws SAXException,
+			IOException {
+		Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
+				xml.getBytes()));
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	public Document loadXMLFromInputStream(InputStream is) throws SAXException,
+			IOException {
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		factory.setNamespaceAware(true);
+		DocumentBuilder builder = null;
+		try {
+			builder = factory.newDocumentBuilder();
+		} catch (ParserConfigurationException ex) {
+		}
+		Document doc = builder.parse(is);
+		is.close();
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	public Document loadXMLFromFile(String filePath)
+			throws ParserConfigurationException, SAXException, IOException {
+		File fXmlFile = new File(filePath);
+		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+		Document doc = dBuilder.parse(fXmlFile);
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+}
\ No newline at end of file

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-disambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/disambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate.core;
+
+import java.util.Collection;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate.Annotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.disambiguate.DBPSpotlightDisambiguateEnhancementEngine;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Annotate
+ * EnhancementEngine.
+ * 
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightDisambiguateEnhancementTest {
+
+	/**
+	 * This contains the logger.
+	 */
+	private static final Logger LOG = LoggerFactory
+			.getLogger(DBPSpotlightDisambiguateEnhancementTest.class);
+	private static String SPL_URL = System
+			.getProperty(DBPSpotlightDisambiguateEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
+			: (String) System
+					.getProperty(DBPSpotlightDisambiguateEnhancementEngine.SL_URL_KEY);
+	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday.";
+	private static DBPSpotlightDisambiguateEnhancementEngine dbpslight;
+	private static String testFile = "spots.xml";
+	private static String spotsXml;
+
+	@BeforeClass
+	public static void oneTimeSetup() throws ConfigurationException {
+		dbpslight = new DBPSpotlightDisambiguateEnhancementEngine();
+		dbpslight.setEndpointUrl(SPL_URL);
+	}
+
+	@Test
+	public void testEntityExtraction() {
+		Collection<Annotation> entities;
+		try {
+			spotsXml = IOUtils.toString(this.getClass().getClassLoader()
+					.getResourceAsStream(testFile));
+			System.out.println(SPL_URL);
+			entities = dbpslight.doPostRequest(TEST_TEXT, spotsXml);
+			LOG.info("Found entities: {}", entities.size());
+			LOG.debug("Entities:\n{}", entities);
+			Assert.assertFalse("No entities were found!", entities.isEmpty());
+		} catch (Exception e) {
+			Assert.assertFalse("An EngineException occurred! The message was: "
+					+ e.getMessage(), true);
+		}
+	}
+}

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml?rev=1375110&r1=1375107&r2=1375110&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/pom.xml Mon Aug 20 17:14:56 2012
@@ -22,7 +22,7 @@
 	</parent>
 
 	<groupId>org.apache.stanbol</groupId>
-	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightspot</artifactId>
+	<artifactId>org.apache.stanbol.enhancer.engines.dbpspotlight.spot</artifactId>
 	<packaging>bundle</packaging>
 
 	<name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Spot</name>
@@ -43,7 +43,7 @@
 				<configuration>
 					<instructions>
 						<Export-Package>
-							org.apache.stanbol.enhancer.engines.dbpspotlightspot;version=${project.version}
+							org.apache.stanbol.enhancer.engines.dbpspotlight.spot;version=${project.version}
 						</Export-Package>
 						<Embed-Dependency>
 						</Embed-Dependency>

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/DBPSpotlightSpotEnhancementEngine.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightSpotEnhancementEngine} provides functionality to enhance
+ * document with their language.
+ * 
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightspot") })
+public class DBPSpotlightSpotEnhancementEngine extends
+		AbstractEnhancementEngine<IOException, RuntimeException> implements
+		EnhancementEngine, ServiceProperties {
+
+	/**
+	 * a configurable value of the text segment length to check
+	 */
+	@Property(value = "http://spotlight.dbpedia.org/rest/spot")
+	public static final String SL_URL_KEY = "stanbol.DBPSpotlightSpotEnhancementEngine.url";
+
+	@Property(value = "LingPipeSpotter")
+	public static final String SL_SPOTTER = "stanbol.DBPSpotlightSpotEnhancementEngine.spotter";
+
+	/**
+	 * The default value for the Execution of this Engine. Currently set to
+	 * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+	 */
+	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 29;
+
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+	/**
+	 * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+	 */
+	private static final Set<String> SUPPORTED_MIMTYPES = Collections
+			.singleton(TEXT_PLAIN_MIMETYPE);
+
+	/**
+	 * This contains a list of languages supported by DBpedia Spotlight. If the
+	 * metadata doesn't contain a value for the language as the value of the
+	 * {@link Property.DC_LANG property} the content can't be processed.
+	 */
+	protected static final Set<String> SUPPORTED_LANGUAGES = Collections
+			.unmodifiableSet(new HashSet<String>(Arrays.asList("en")));
+
+	/** holds the logger. */
+	private static final Logger log = LoggerFactory
+			.getLogger(DBPSpotlightSpotEnhancementEngine.class);
+
+	/** holds the url of the Spotlight REST endpoint */
+	private String spotlightUrl;
+	/** holds the chosen of spotter to be used */
+	private String spotlightSpotter;
+
+	/**
+	 * Initialize all parameters from the configuration panel, or with their
+	 * default values
+	 * 
+	 * @param ce
+	 *            the {@link ComponentContext}
+	 */
+	@SuppressWarnings("unchecked")
+	protected void activate(ComponentContext ce) throws ConfigurationException,
+			IOException {
+
+		super.activate(ce);
+
+		Dictionary<String, Object> properties = ce.getProperties();
+		spotlightUrl = properties.get(SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/spot"
+				: (String) properties.get(SL_URL_KEY);
+		spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
+				: (String) properties.get(SL_SPOTTER);
+	}
+
+	/**
+	 * Check if the content can be enhanced
+	 * 
+	 * @param ci
+	 *            the {@link ContentItem}
+	 */
+	public int canEnhance(ContentItem ci) throws EngineException {
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+			String language = getMetadataLanguage(ci.getMetadata(), null);
+			if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
+				log.info(
+						"DBpedia Spotlight can not process ContentItem {} because "
+								+ "language {} is not supported (supported: {})",
+						new Object[] { ci.getUri(), language,
+								SUPPORTED_LANGUAGES });
+				return CANNOT_ENHANCE;
+			}
+			return ENHANCE_SYNCHRONOUS;
+		}
+		return CANNOT_ENHANCE;
+	}
+
+	/**
+	 * Calculate the enhancements by doing a POST request to the DBpedia
+	 * Spotlight endpoint and processing the results
+	 * 
+	 * @param ci
+	 *            the {@link ContentItem}
+	 */
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
+				SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException(
+					"No ContentPart with Mimetype '"
+							+ TEXT_PLAIN_MIMETYPE
+							+ "' found for ContentItem "
+							+ ci.getUri()
+							+ ": This is also checked in the canEnhance method! -> This "
+							+ "indicated an Bug in the implementation of the "
+							+ "EnhancementJobManager!");
+		}
+		String text = "";
+		try {
+			text = ContentItemHelper.getText(contentPart.getValue());
+		} catch (IOException e) {
+			throw new InvalidContentException(this, ci, e);
+		}
+
+		Collection<SurfaceForm> dbpslGraph = doPostRequest(text);
+		if (dbpslGraph != null) {
+			// Acquire a write lock on the ContentItem when adding the
+			// enhancements
+			ci.getLock().writeLock().lock();
+			try {
+				createEnhancements(dbpslGraph, ci);
+				if (log.isDebugEnabled()) {
+					Serializer serializer = Serializer.getInstance();
+					ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
+					serializer.serialize(debugStream, ci.getMetadata(),
+							"application/rdf+xml");
+					try {
+						log.debug("DBpedia Spotlight Spot Enhancements:\n{}",
+								debugStream.toString("UTF-8"));
+					} catch (UnsupportedEncodingException e) {
+						e.printStackTrace();
+					}
+				}
+			} finally {
+				ci.getLock().writeLock().unlock();
+			}
+		}
+	}
+
+	/**
+	 * The method adds the returned DBpedia Spotlight surface forms to the
+	 * content item's metadata. For each one an TextAnnotation is created.
+	 * 
+	 * @param occs
+	 *            a Collection of entity information
+	 * @param ci
+	 *            the content item
+	 */
+	public void createEnhancements(Collection<SurfaceForm> occs,
+			ContentItem ci) {
+		LiteralFactory literalFactory = LiteralFactory.getInstance();
+		final Language language; // used for plain literals representing parts
+									// fo the content
+		String langString = getMetadataLanguage(ci.getMetadata(), null);
+
+		if (langString != null && !langString.isEmpty()) {
+			language = new Language(langString);
+		} else {
+			language = null;
+		}
+
+		HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, UriRef>();
+
+		for (SurfaceForm occ : occs) {
+			UriRef textAnnotation = EnhancementEngineHelper
+					.createTextEnhancement(ci, this);
+			MGraph model = ci.getMetadata();
+
+			model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
+					new PlainLiteralImpl(occ.name, language)));
+			model.add(new TripleImpl(textAnnotation, ENHANCER_START,
+					literalFactory.createTypedLiteral(occ.offset)));
+			model.add(new TripleImpl(textAnnotation, ENHANCER_END,
+					literalFactory.createTypedLiteral(occ.offset
+							+ occ.name.length())));
+			model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef(
+					occ.type)));
+			// TODO ################## model.add(new TripleImpl(textAnnotation,
+			// ENHANCER_SELECTION_CONTEXT, new
+			// PlainLiteralImpl(occ.context,language)));
+
+			if (entityAnnotationMap.containsKey(occ.name)) {
+				model.add(new TripleImpl(entityAnnotationMap.get(occ.name),
+						DC_RELATION, textAnnotation));
+			} else {
+				entityAnnotationMap.put(occ.name, textAnnotation);
+			}
+		}
+	}
+
+	/**
+	 * Sends a POST request to the DBpediaSpotlight url.
+	 * 
+	 * @param text
+	 *            a <code>String</code> with the text to be analyzed
+	 * @return a <code>String</code> with the server response
+	 * @throws EngineException
+	 *             if the request cannot be sent
+	 */
+	public Collection<SurfaceForm> doPostRequest(String text)
+			throws EngineException {
+		StringBuilder data = new StringBuilder();
+		try {
+			if (spotlightSpotter != null && !spotlightSpotter.isEmpty())
+				data.append(URLEncoder.encode("spotter", "UTF-8") + "="
+						+ URLEncoder.encode(spotlightSpotter, "UTF-8") + "&");
+			data.append(URLEncoder.encode("text", "UTF-8") + "="
+					+ URLEncoder.encode(text, "UTF-8"));
+		} catch (UnsupportedEncodingException e) {
+			throw new EngineException(
+					"Data for the httprequest could not be converted. Error: "
+							+ e.getMessage());
+		}
+
+		HttpURLConnection connection = null;
+		StringBuffer response = new StringBuffer();
+
+		try {
+			// Create connection
+			URL url = new URL(spotlightUrl);
+			connection = (HttpURLConnection) url.openConnection();
+			connection.setRequestMethod("POST");
+			connection.setRequestProperty("Content-Type",
+					"application/x-www-form-urlencoded");
+			connection.setRequestProperty("Accept", "text/xml");
+
+			connection.setUseCaches(false);
+			connection.setDoInput(true);
+			connection.setDoOutput(true);
+
+			// Send request
+			DataOutputStream wr = new DataOutputStream(
+					connection.getOutputStream());
+			wr.writeBytes(data.toString());
+			wr.flush();
+			wr.close();
+
+			// Get Response
+			InputStream is = connection.getInputStream();
+			BufferedReader rd = new BufferedReader(new InputStreamReader(is));
+			String line;
+			while ((line = rd.readLine()) != null) {
+				response.append(line);
+				response.append('\r');
+			}
+			rd.close();
+
+		} catch (Exception e) {
+
+			log.error("[request] Request could not be made. Error: "
+					+ e.getMessage());
+			e.printStackTrace();
+			return null;
+
+		} finally {
+
+			if (connection != null) {
+				connection.disconnect();
+			}
+		}
+
+		XMLParser xmlParser = new XMLParser();
+		try {
+			Document xmlDoc = xmlParser.loadXMLFromString(response.toString());
+			NodeList nlist = xmlParser.getElementsByTagName(xmlDoc,
+					"surfaceForm");
+			Collection<SurfaceForm> annos = this.getAnnotations(nlist);
+
+			return annos;
+		} catch (Exception e) {
+			log.error("[response] Response XML could not be parsed. Error: "
+					+ e.getMessage());
+			throw new EngineException(
+					"Response XML could not be parsed. Error: "
+							+ e.getMessage());
+		}
+	}
+
+	/**
+	 * This method creates the Collection of surface forms, which the method
+	 * <code>createEnhancement</code> adds to the meta data of the content item
+	 * as TextAnnotations.
+	 * 
+	 * @param nList
+	 *            NodeList of all Resources contained in the XML response from
+	 *            DBpedia Spotlight
+	 * @return a Collection<DBPSLSurfaceForm> with all annotations
+	 */
+	private Collection<SurfaceForm> getAnnotations(NodeList nList) {
+		Collection<SurfaceForm> dbpslAnnos = new HashSet<SurfaceForm>();
+
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			SurfaceForm dbpslann = new SurfaceForm();
+			Element node = (Element) nList.item(temp);
+			dbpslann.name = node.getAttribute("name");
+			dbpslann.offset = (new Integer(node.getAttribute("offset")))
+					.intValue();
+			dbpslann.type = node.getAttribute("type");
+
+			dbpslAnnos.add(dbpslann);
+		}
+
+		return dbpslAnnos;
+	}
+
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(
+				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+	public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+		Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+		if (it.hasNext()) {
+			Resource langNode = it.next().getObject();
+			return getLexicalForm(langNode);
+		}
+		return null;
+	}
+
+	public String getLexicalForm(Resource res) {
+		if (res == null) {
+			return null;
+		} else if (res instanceof Literal) {
+			return ((Literal) res).getLexicalForm();
+		} else {
+			return res.toString();
+		}
+	}
+
+	/**
+	 * This method is used by the test class to set the endpoint url
+	 * 
+	 * @param url
+	 *            String the url of the Spotlight endpoint
+	 */
+	public void setEndpointUrl(String url) {
+		spotlightUrl = url;
+	}
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/SurfaceForm.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Spot.
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class SurfaceForm {
+
+	public String name;
+	public String type;
+	public Integer offset;
+
+	public String toString() {
+		return String.format("[name=%s, offset=%i, type=%s]", name, offset,
+				type);
+	}
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/XMLParser.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ * 
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+
+public class XMLParser {
+
+	public NodeList getElementsByTagName(Document doc, String tagName) {
+
+		return doc.getElementsByTagName(tagName);
+	}
+
+	public Document loadXMLFromString(String xml) throws SAXException,
+			IOException {
+		Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
+				xml.getBytes()));
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	public Document loadXMLFromInputStream(InputStream is) throws SAXException,
+			IOException {
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		factory.setNamespaceAware(true);
+		DocumentBuilder builder = null;
+		try {
+			builder = factory.newDocumentBuilder();
+		} catch (ParserConfigurationException ex) {
+		}
+		Document doc = builder.parse(is);
+		is.close();
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	public Document loadXMLFromFile(String filePath)
+			throws ParserConfigurationException, SAXException, IOException {
+		File fXmlFile = new File(filePath);
+		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+		Document doc = dBuilder.parse(fXmlFile);
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+}
\ No newline at end of file

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java?rev=1375110&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight-spot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlight/spot/core/DBPSpotlightSpotEnhancementTest.java Mon Aug 20 17:14:56 2012
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlight.spot.core;
+
+import java.util.Collection;
+
+import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine;
+import org.apache.stanbol.enhancer.engines.dbpspotlight.spot.SurfaceForm;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Spot
+ * EnhancementEngine.
+ * 
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightSpotEnhancementTest {
+
+	/**
+	 * This contains the logger.
+	 */
+	private static final Logger LOG = LoggerFactory
+			.getLogger(DBPSpotlightSpotEnhancementTest.class);
+	private static String SPL_URL = System
+			.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/spot"
+			: (String) System
+					.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY);
+	private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
+	private static DBPSpotlightSpotEnhancementEngine dbpslight;
+
+	@BeforeClass
+	public static void oneTimeSetup() throws ConfigurationException {
+		dbpslight = new DBPSpotlightSpotEnhancementEngine();
+		dbpslight.setEndpointUrl(SPL_URL);
+	}
+
+	@Test
+	public void testEntityExtraction() {
+		Collection<SurfaceForm> entities;
+		try {
+			entities = dbpslight.doPostRequest(TEST_TEXT);
+			LOG.info("Found entities: {}", entities.size());
+			LOG.debug("Entities:\n{}", entities);
+			Assert.assertFalse("No entities were found!", entities.isEmpty());
+		} catch (EngineException e) {
+			Assert.assertFalse("An EngineException occurred! The message was: "
+					+ e.getMessage(), true);
+		}
+	}
+
+}

Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml?rev=1375110&r1=1375109&r2=1375110&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/pom.xml Mon Aug 20 17:14:56 2012
@@ -57,9 +57,9 @@
     <module>opencalais</module> <!-- http://opencalais.com/ -->
     <module>zemanta</module> <!-- htt://zemanta.com -->
     <!-- DBpedia.org Spotlight Enhancement Engines (STANBOL-706) -->
-    <module>dbpspotlightannotate</module>
-    <module>dbpspotlightcandidates</module>
-    <module>dbpspotlightdisambiguate</module>
-    <module>dbpspotlightspot</module>
+    <module>dbpedia-spotlight-annotate</module>
+    <module>dbpedia-spotlight-candidates</module>
+    <module>dbpedia-spotlight-disambiguate</module>
+    <module>dbpedia-spotlight-spot</module>
   </modules>
 </project>