You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/20 14:11:05 UTC

svn commit: r1374984 [3/3] - in /incubator/stanbol/branches/dbpedia-spotlight-engines/engines: dbpspotlightannotate/ dbpspotlightannotate/src/ dbpspotlightannotate/src/license/ dbpspotlightannotate/src/main/ dbpspotlightannotate/src/main/java/ dbpspotl...

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml Mon Aug 20 12:11:01 2012
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <groupId>org.apache.stanbol</groupId>
+    <version>0.9.0-incubating</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightspot</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Spot</name>
+  <description>an enhancement engine for spotting</description>
+
+  <inceptionYear>2010</inceptionYear>
+
+  <!--scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol/</url>
+  </scm-->
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>
+              org.apache.stanbol.enhancer.engines.dbpspotlightspot;version=${project.version}
+            </Export-Package>
+            <Embed-Dependency>
+            </Embed-Dependency>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 licensed files: See src/test/resources/README -->
+            <exclude>src/test/resources/en.txt</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,17 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache License
+# - Common Development and Distribution License (CDDL) v1.0
+# - Common Public License Version 1.0
+# - ICU License
+# - MIT License
+# - The Apache Software License, Version 2.0
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Wed Feb 15 19:06:13 CET 2012
+javax.servlet--servlet-api--2.4=Common Development And Distribution License (CDDL), Version 1.0
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Spot.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class DBPSLSurfaceForm {
+
+	public String name;
+	public String type;
+	public Integer offset;
+
+	public String toString() {
+		return String.format( "[name=%s, offset=%i, type=%s]", name, offset, type ) ;
+	}
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightSpotEnhancementEngine} provides functionality to enhance document
+ * with their language.
+ *
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(
+    metatype = true, 
+    immediate = true,
+    label = "%stanbol.DBPSpotlightSpotEnhancementEngine.name", 
+    description = "%stanbol.DBPSpotlightSpotEnhancementEngine.description")
+@Service
+@Properties(value={
+    @Property(name=EnhancementEngine.PROPERTY_NAME,value="dbpspotlightspot")
+})
+public class DBPSpotlightSpotEnhancementEngine 
+        extends AbstractEnhancementEngine<IOException,RuntimeException>
+        implements EnhancementEngine, ServiceProperties {
+
+    /**
+     * a configurable value of the text segment length to check
+     */
+    @Property(value = "http://spotlight.dbpedia.org/rest/spot")
+    public static final String SL_URL_KEY = "stanbol.DBPSpotlightSpotEnhancementEngine.url";
+    
+    @Property(value = "LingPipeSpotter")
+    public static final String SL_SPOTTER = "stanbol.DBPSpotlightSpotEnhancementEngine.spotter";
+
+
+    /** 
+     * The default value for the Execution of this Engine. Currently set to
+     * {@link ServiceProperties#ORDERING_PRE_PROCESSING} 
+     */
+    public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 29;
+
+    /**
+     * This contains the only MIME type directly supported by this enhancement engine.
+     */
+    private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+    /**
+     * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+     */
+    private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+    /**
+     * This contains a list of languages supported by DBpedia Spotlight.
+     * If the metadata doesn't contain a value for the language as the value of the {@link Property.DC_LANG property}
+     * the content can't be processed.
+     */
+    protected static final Set<String> SUPPORTED_LANGUAGES = 
+            Collections.unmodifiableSet(new HashSet<String>(
+                    Arrays.asList("en")));
+
+    /** holds the logger. */
+    private static final Logger log = LoggerFactory.getLogger(DBPSpotlightSpotEnhancementEngine.class);
+
+    /** holds the url of the Spotlight REST endpoint */
+    private String spotlightUrl;
+    /** holds the chosen of spotter to be used */
+    private String spotlightSpotter;
+
+
+
+    /**
+     * Initialize all parameters from the configuration panel, or with their default values
+     * @param ce  the {@link ComponentContext}
+     */
+    @SuppressWarnings("unchecked")
+	protected void activate(ComponentContext ce) throws ConfigurationException, IOException {
+        
+        super.activate(ce);
+        
+        Dictionary<String, Object> properties = ce.getProperties();
+        spotlightUrl                          = properties.get( SL_URL_KEY ) == null ? "http://spotlight.dbpedia.org/rest/spot" : (String) properties.get( SL_URL_KEY );
+        spotlightSpotter                      = properties.get( SL_SPOTTER ) == null ? null : (String) properties.get( SL_SPOTTER );
+    }   
+    
+
+    /**
+     * Check if the content can be enhanced
+     * @param ci  the {@link ContentItem}
+     */
+    public int canEnhance(ContentItem ci) throws EngineException {
+        if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null){
+            String language = getMetadataLanguage(ci.getMetadata(), null);
+            if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
+                log.info("DBpedia Spotlight can not process ContentItem {} because "
+                    + "language {} is not supported (supported: {})",
+                    new Object[]{ci.getUri(),language,SUPPORTED_LANGUAGES});
+                return CANNOT_ENHANCE;
+            }
+            return ENHANCE_SYNCHRONOUS;
+        } 
+        return CANNOT_ENHANCE;
+    }
+
+
+    /**
+     * Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results 
+     * @param ci  the {@link ContentItem}
+     */
+    public void computeEnhancements( ContentItem ci ) throws EngineException {
+        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+        if(contentPart == null){
+            throw new IllegalStateException("No ContentPart with Mimetype '"
+                    + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri()
+                    + ": This is also checked in the canEnhance method! -> This "
+                    + "indicated an Bug in the implementation of the "
+                    + "EnhancementJobManager!");
+        }
+        String text = "";
+        try {
+            text = ContentItemHelper.getText(contentPart.getValue());
+        } catch (IOException e) {
+            throw new InvalidContentException(this, ci, e);
+        }
+
+        Collection<DBPSLSurfaceForm> dbpslGraph = doPostRequest( text );
+        if ( dbpslGraph != null ) {
+            //Acquire a write lock on the ContentItem when adding the enhancements
+            ci.getLock().writeLock().lock();
+            try {
+                createEnhancements( dbpslGraph, ci);
+                if (log.isDebugEnabled()) {
+                    Serializer serializer = Serializer.getInstance();
+                    ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
+                    serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
+                    try {
+                        log.debug("DBpedia Spotlight Spot Enhancements:\n{}",debugStream.toString("UTF-8"));
+                    } catch (UnsupportedEncodingException e) {
+                        e.printStackTrace();
+                    }
+                }
+            } finally {
+                ci.getLock().writeLock().unlock();
+            }
+        }
+    }
+
+
+    /**
+     * The method adds the returned DBpedia Spotlight surface forms to the content item's metadata.
+     * For each one an TextAnnotation is created.
+     *
+     * @param occs a Collection of entity information
+     * @param ci the content item
+     */
+    public void createEnhancements( Collection<DBPSLSurfaceForm> occs, ContentItem ci ) {
+        LiteralFactory literalFactory = LiteralFactory.getInstance();
+        final Language language;       // used for plain literals representing parts fo the content
+        String langString             = getMetadataLanguage(ci.getMetadata(), null);
+        
+        if(langString != null && !langString.isEmpty()){
+            language = new Language(langString);
+        } else {
+            language = null;
+        }
+        
+        HashMap<String, UriRef> entityAnnotationMap = new HashMap<String, UriRef>();
+
+        for (DBPSLSurfaceForm occ : occs) {
+            UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement( ci, this );
+            MGraph model          = ci.getMetadata();
+
+            model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,new PlainLiteralImpl(occ.name,language)));
+            model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
+            model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.name.length())));
+            model.add(new TripleImpl(textAnnotation, DC_TYPE, new UriRef( occ.type )));
+            // TODO ################## model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context,language)));
+
+            if (entityAnnotationMap.containsKey(occ.name)) {
+                model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
+            } 
+            else {
+                entityAnnotationMap.put(occ.name,textAnnotation);
+            }
+        }
+    }
+
+
+
+
+    /**
+     * Sends a POST request to the DBpediaSpotlight url.
+     * @param text a <code>String</code> with the text to be analyzed
+     * @return a <code>String</code> with the server response
+     * @throws EngineException if the request cannot be sent
+     */
+    public Collection<DBPSLSurfaceForm> doPostRequest( String text ) throws EngineException {
+		StringBuilder data = new StringBuilder();
+		try {
+			if ( spotlightSpotter != null && !spotlightSpotter.isEmpty() )
+    			data.append( URLEncoder.encode( "spotter", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSpotter, "UTF-8" ) + "&" );
+			data.append( URLEncoder.encode( "text", "UTF-8" ) + "=" + URLEncoder.encode( text, "UTF-8" ) );
+		} catch (UnsupportedEncodingException e) {
+			throw new EngineException( "Data for the httprequest could not be converted. Error: " + e.getMessage() );
+		}
+
+		HttpURLConnection connection = null;  
+		StringBuffer response        = new StringBuffer(); 
+		
+        try {
+			//Create connection
+			URL url = new URL( spotlightUrl );
+			connection = ( HttpURLConnection )url.openConnection();
+			connection.setRequestMethod( "POST" );
+			connection.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded" );
+			connection.setRequestProperty( "Accept", "text/xml" );
+
+			connection.setUseCaches( false );
+			connection.setDoInput( true );
+			connection.setDoOutput( true );
+
+			//Send request
+			DataOutputStream wr = new DataOutputStream (
+					connection.getOutputStream ());
+			wr.writeBytes( data.toString() );
+			wr.flush ();
+			wr.close ();
+
+			//Get Response	
+			InputStream is = connection.getInputStream();
+			BufferedReader rd = new BufferedReader( new InputStreamReader( is ) );
+			String line;
+			while((line = rd.readLine()) != null) {
+				response.append( line );
+				response.append( '\r' );
+			}
+			rd.close();
+
+		} catch (Exception e) {
+
+		    log.error( "[request] Request could not be made. Error: " + e.getMessage() );
+			e.printStackTrace();
+			return null;
+
+		} finally {
+
+			if(connection != null) {
+				connection.disconnect(); 
+			}
+		}
+		
+
+	    XMLParser xmlParser = new XMLParser();
+		try {
+		    Document xmlDoc                   = xmlParser.loadXMLFromString( response.toString() );
+			NodeList nlist                    = xmlParser.getElementsByTagName( xmlDoc, "surfaceForm" );
+			Collection<DBPSLSurfaceForm> annos = this.getAnnotations( nlist );
+        
+            return annos;
+   		} catch ( Exception e) {
+		    log.error( "[response] Response XML could not be parsed. Error: " + e.getMessage() );
+			throw new EngineException( "Response XML could not be parsed. Error: " + e.getMessage() );
+		}
+    }
+    
+    
+    /**
+     * This method creates the Collection of surface forms, which the method <code>createEnhancement</code>
+     * adds to the meta data of the content item as TextAnnotations.
+     * @param nList NodeList of all Resources contained in the XML response from DBpedia Spotlight
+     * @return a Collection<DBPSLSurfaceForm> with all annotations
+     */
+	private Collection<DBPSLSurfaceForm> getAnnotations( NodeList nList ) {
+		Collection<DBPSLSurfaceForm> dbpslAnnos = new HashSet<DBPSLSurfaceForm>();
+		
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			DBPSLSurfaceForm dbpslann        = new DBPSLSurfaceForm();
+			Element node                     = (Element) nList.item(temp);
+			dbpslann.name                    = node.getAttribute( "name" );
+			dbpslann.offset                  = (new Integer( node.getAttribute( "offset" ) ) ).intValue();
+			dbpslann.type                    = node.getAttribute( "type" );
+			
+			dbpslAnnos.add( dbpslann );
+		}
+		
+		return dbpslAnnos;
+	}
+
+
+    public Map<String, Object> getServiceProperties() {
+        return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+    }
+
+
+    public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+        Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+        if (it.hasNext()) {
+            Resource langNode = it.next().getObject();
+            return getLexicalForm(langNode);
+        }
+        return null;
+    }
+
+    public String getLexicalForm(Resource res) {
+        if (res == null) {
+            return null;
+        } else if (res instanceof Literal) {
+            return ((Literal) res).getLexicalForm();
+        } else {
+            return res.toString();
+        }
+    }
+
+    
+    /**
+     * This method is used by the test class to set the endpoint url
+     * @param url String the url of the Spotlight endpoint
+     */
+    public void setEndpointUrl( String url ) {
+    	spotlightUrl = url;
+    }
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+
+public class XMLParser {
+ 
+	public NodeList getElementsByTagName( Document doc, String tagName ) {
+		
+		return doc.getElementsByTagName( tagName );
+	}
+	
+
+	public Document loadXMLFromString( String xml ) throws SAXException, IOException {
+		Document doc = loadXMLFromInputStream( new ByteArrayInputStream( xml.getBytes() ) );
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	
+	public Document loadXMLFromInputStream( InputStream is ) throws SAXException, IOException {
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		factory.setNamespaceAware( true );
+		DocumentBuilder builder        = null;
+		try {
+			builder = factory.newDocumentBuilder();
+		}
+		catch ( ParserConfigurationException ex ) {
+		}  
+		Document doc = builder.parse(is);
+		is.close();
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+
+	public Document loadXMLFromFile( String filePath ) throws ParserConfigurationException, SAXException, IOException {
+		File fXmlFile = new File( filePath );
+		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+		Document doc = dBuilder.parse(fXmlFile);
+		doc.getDocumentElement().normalize();
+		
+		return doc;
+	}
+}
\ No newline at end of file

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,32 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+
+
+# This file contains localization strings for configuration labels and
+# descriptions as used in the metatype.xml descriptor generated by the
+# the maven SCR plugin
+
+stanbol.DBPSpotlightSpotEnhancementEngine.name = DBpedia Spotlight Spotter: Named Entity Recognition
+stanbol.DBPSpotlightSpotEnhancementEngine.description = This engine performs just Named Entity Recognition, \
+    so it is suited for EnhancementChain scenario, in which another Engine links the recognized TextAnnotations \
+    to Ontology Types
+stanbol.DBPSpotlightSpotEnhancementEngine.url.name = Spotlight URL
+stanbol.DBPSpotlightSpotEnhancementEngine.url.description = The URL which will be used for the request
+stanbol.DBPSpotlightSpotEnhancementEngine.spotter.name = Spotter
+stanbol.DBPSpotlightSpotEnhancementEngine.spotter.description = The algorithm which will be used for Spotting \
+    (aka Term Recognition). Currently available: NER, LingPipeSpotter, OpenNLPChunkerSpotter, Kea

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightspot.core;
+
+import java.util.Collection;
+
+import org.apache.stanbol.enhancer.engines.dbpspotlightspot.DBPSLSurfaceForm;
+import org.apache.stanbol.enhancer.engines.dbpspotlightspot.DBPSpotlightSpotEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Spot EnhancementEngine.
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightSpotEnhancementTest {
+
+	  /**
+	   * This contains the logger.
+	   */
+	  private static final Logger LOG  = LoggerFactory.getLogger(DBPSpotlightSpotEnhancementTest.class);
+	  private static String SPL_URL    = System.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY) == null ? 
+			  											"http://spotlight.dbpedia.org/rest/spot" : (String) System.getProperty(DBPSpotlightSpotEnhancementEngine.SL_URL_KEY);
+	  private static String TEST_TEXT  = "President Obama is meeting Angela Merkel in Berlin on Monday";
+	  private static DBPSpotlightSpotEnhancementEngine dbpslight;
+	  
+	  @BeforeClass
+	  public static void oneTimeSetup() throws ConfigurationException {
+		  dbpslight = new DBPSpotlightSpotEnhancementEngine();
+		  dbpslight.setEndpointUrl( SPL_URL );
+	  }
+
+	  
+	  @Test
+	  public void testEntityExtraction() {
+	    Collection<DBPSLSurfaceForm> entities;
+		try {
+			entities = dbpslight.doPostRequest( TEST_TEXT );
+		    LOG.info("Found entities: {}",entities.size());
+		    LOG.debug("Entities:\n{}",entities);
+		    Assert.assertFalse("No entities were found!", entities.isEmpty());
+		} catch (EngineException e) {
+		    Assert.assertFalse("An EngineException occurred! The message was: " + e.getMessage(), true);
+		}
+	  }
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README Mon Aug 20 12:11:01 2012
@@ -0,0 +1,15 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+