You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/20 14:11:05 UTC

svn commit: r1374984 [1/3] - in /incubator/stanbol/branches/dbpedia-spotlight-engines/engines: dbpspotlightannotate/ dbpspotlightannotate/src/ dbpspotlightannotate/src/license/ dbpspotlightannotate/src/main/ dbpspotlightannotate/src/main/java/ dbpspotl...

Author: rwesten
Date: Mon Aug 20 12:11:01 2012
New Revision: 1374984

URL: http://svn.apache.org/viewvc?rev=1374984&view=rev
Log:
STANBOL-704: Applied the provided patch with a minor correction of a package name of an unit test for the disambiguation engine

Added:
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/   (with props)
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/   (with props)
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSpotlightCandidatesEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/OSGI-INF/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/OSGI-INF/metatype/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/core/DBPSpotlightCandidatesEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/resources/README
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/   (with props)
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/README.md
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/license/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/license/THIRD-PARTY.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/DBPSLAnnotation.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/OSGI-INF/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/OSGI-INF/metatype/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/README
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/spots.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/   (with props)
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/README.md
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/
    incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README

Propchange: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Aug 20 12:11:01 2012
@@ -0,0 +1,7 @@
+.classpath
+
+.settings
+
+target
+
+.project

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md Mon Aug 20 12:11:01 2012
@@ -0,0 +1,96 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+# LangId: Language Identification Enhancement Engine
+
+The **LangId** engine determines the language of text. 
+
+## Technical Description
+
+The provided engine is based on the language identifier of [Apache Tika](http://tika.apache.org/).
+The text to be checked must be provided in plain text format in one of two forms:
+
+* a plain text content item
+* by the content item's metadata as the string value of the property 
+    
+    <pre><code>http://www.semanticdesktop.org/ontologies/2007/01/19/nie#plainTextContent</pre></code>
+
+The result of language identification is added as TextAnnotation to the content item's metadata as string value of the property
+
+    http://purl.org/dc/terms/language
+
+This RDF snippet illustrates the output:
+
+    <fise:TextAnnotation rdf:about="urn:enhancement-a147957b-41f9-58f7-bbf1-b880b3aa4b49">
+        <dc:language>en</dc:language>
+        <dc:creator>org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine</dc:creator>
+    </fise:TextAnnotation>
+
+
+By default the language identifier distinguishes the languages listed below. After the colon the value of the language label in the metadata is given.
+
+* German: de
+* English: en
+* Estonian: et
+* French: fr
+* Spanish: es
+* Italian: it
+* Swedish: sv
+* Polish: pl
+* Dutch: nl
+* Norwegian: no
+* Finnish: fi
+* Greek: el
+* Danish: da
+* Hungarian: hu
+* Icelandic: is
+* Lithuanian: lt
+* Portuguese: pt
+* Russian: ru
+* Thai: th
+
+Additional language models can be created as Tika [LanguageProfile](org.apache.tika.language.LanguageProfile).
+
+## Configuration options
+
+* <pre><code>org.apache.stanbol.enhancer.engines.langid.probe-length</pre></code>
+
+    an integer specifying how many characters will be used for
+    identification. A value of 0 or below means to use the complete
+    text. Otherwise only a substring of the specified length taken from the
+    middle of the text will be used. The default value is 400 characters.
+
+## Usage
+
+Assuming that the Stanbol endpoint with the full launcher is running at
+
+    http://localhost:8080
+
+and the engine is activated, from the command line commands like this
+can be used for submitting some text file as content item:
+
+* stateless interface
+
+    curl -i -X POST -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/engines
+
+* stateful interface
+
+    curl -i -X PUT -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/contenthub/content/someFileId
+
+Alternatively, the Stanbol web interface can be used for submitting documents
+and viewing the metadata at
+
+    http://localhost:8080/contenthub
+

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml Mon Aug 20 12:11:01 2012
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <groupId>org.apache.stanbol</groupId>
+    <version>0.9.0-incubating</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightannotate</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
+  <description>just tests the Stanbol Engine Import
+  </description>
+
+  <inceptionYear>2010</inceptionYear>
+
+  <!--scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol/</url>
+  </scm-->
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>
+              org.apache.stanbol.enhancer.engines.dbpspotlightannotate;version=${project.version}
+            </Export-Package>
+            <Embed-Dependency>
+            </Embed-Dependency>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 licensed files: See src/test/resources/README -->
+            <exclude>src/test/resources/en.txt</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,17 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache License
+# - Common Development and Distribution License (CDDL) v1.0
+# - Common Public License Version 1.0
+# - ICU License
+# - MIT License
+# - The Apache Software License, Version 2.0
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Wed Feb 15 19:06:13 CET 2012
+javax.servlet--servlet-api--2.4=Common Development And Distribution License (CDDL), Version 1.0
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate;
+
+import java.util.HashSet;
+
+import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Contains a result given by DBPedia Spotlight..
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class DBPSLAnnotation {
+
+	public Resource uri;
+	public String types;
+	public Integer support;
+	public String surfaceForm;
+	public Integer offset;
+	public Double similarityScore;
+	public Double percentageOfSecondRank;
+	
+
+    public HashSet<String> getTypeNames() {
+        if (types != null) {
+            HashSet<String> t = new HashSet<String>();
+            String[] typex    = types.split(",");
+            for ( String type: typex )  {
+            	// make the returned types referenceable
+            	String deref = type.replace( "DBpedia:", "dbp-ont:")
+            			.replace( "Freebase:", "http://www.freebase.com/schema")
+            			.replace( "Schema:", "http://www.schema.org/");
+                t.add(deref);
+            }
+            return t;
+        }
+        return null;
+    }
+
+	public String toString() {
+		return String.format("[uri=%s, support=%i, types=%s, surfaceForm=\"%s\", offset=%i, similarityScore=%d, percentageOfSecondRank=%d]",
+				uri, support, types, surfaceForm, offset, similarityScore, percentageOfSecondRank);
+	}
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,432 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightAnnotateEnhancementEngine} provides functionality to enhance a document
+ * using the DBpedia Spotlight /annotate REST endpoint
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(
+    metatype = true, 
+    immediate = true,
+    label = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.name", 
+    description = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.description")
+@Service
+@Properties(value={
+    @Property(name=EnhancementEngine.PROPERTY_NAME,value="dbpspotlightannotate")
+})
+public class DBPSpotlightAnnotateEnhancementEngine 
+        extends AbstractEnhancementEngine<IOException,RuntimeException>
+        implements EnhancementEngine, ServiceProperties {
+
+    /**
+     * a configurable value of the text segment length to check
+     */
+    @Property(value = "http://spotlight.dbpedia.org/rest/annotate")
+    public static final String SL_URL_KEY = "stanbol.DBPSpotlightAnnotateEnhancementEngine.url";
+    
+    @Property(value = "NESpotter")
+    public static final String SL_SPOTTER = "stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter";
+
+    @Property(value = "")
+    public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator";
+
+    @Property()
+    public static final String SL_RESTRICTION = "stanbol.DBPSpotlightAnnotateEnhancementEngine.types";
+
+    @Property()
+    public static final String SL_SPARQL = "stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql";
+
+    @Property()
+    public static final String SL_SUPPORT = "stanbol.DBPSpotlightAnnotateEnhancementEngine.support";
+
+    @Property()
+    public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence";
+
+
+    /** 
+     * The default value for the Execution of this Engine. 
+     */
+    public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 27;
+
+    /** This contains the only MIME type directly supported by this enhancement engine. */
+    private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+    /** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
+    private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+    /** holds the logger. */
+    private static final Logger log = LoggerFactory.getLogger(DBPSpotlightAnnotateEnhancementEngine.class);
+
+    /** holds the url of the Spotlight REST endpoint */
+    private String spotlightUrl;
+    /** holds the chosen of spotter to be used */
+    private String spotlightSpotter;
+    /** holds the chosen of disambiguator to be used */
+    private String spotlightDisambiguator;
+    /** holds the type restriction for the results, if the user wishes one */
+    private String spotlightTypesRestriction;
+    /** holds the chosen minimal support value */
+    private String spotlightSupport;
+    /** holds the chosen minimal confidence value */
+    private String spotlightConfidence;
+    /** holds the sparql restriction for the results, if the user wishes one */
+    private String spotlightSparql;
+
+
+    /**
+     * Initialize all parameters from the configuration panel, or with their default values
+     * @param ce  the {@link ComponentContext}
+     */
+    @SuppressWarnings("unchecked")
+	protected void activate( ComponentContext ce ) throws ConfigurationException, IOException {
+        
+        super.activate(ce);
+        
+        Dictionary<String, Object> properties = ce.getProperties();
+        spotlightUrl                          = properties.get( SL_URL_KEY ) == null ? "http://spotlight.dbpedia.org/rest/annotate" : (String) properties.get( SL_URL_KEY );
+        spotlightSpotter                      = properties.get( SL_SPOTTER ) == null ? null : (String) properties.get( SL_SPOTTER );
+        spotlightDisambiguator                = properties.get( SL_DISAMBIGUATOR ) == null ? null : (String) properties.get( SL_DISAMBIGUATOR );
+        spotlightTypesRestriction             = properties.get( SL_RESTRICTION ) == null ? null : (String) properties.get( SL_RESTRICTION );
+        spotlightSparql                       = properties.get( SL_SPARQL ) == null ? null : (String) properties.get( SL_SPARQL );
+        spotlightSupport                      = properties.get( SL_SUPPORT ) == null ? null : (String) properties.get( SL_SUPPORT );
+        spotlightConfidence                   = properties.get( SL_CONFIDENCE ) == null ? null : (String) properties.get( SL_CONFIDENCE );
+    }
+    
+    
+
+    /**
+     * Check if the content can be enhanced
+     * @param ci  the {@link ContentItem}
+     */
+     public int canEnhance( ContentItem ci ) throws EngineException {
+        if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null){
+            return ENHANCE_SYNCHRONOUS; 
+        } else {
+            return CANNOT_ENHANCE;
+        }
+    }
+
+
+     /**
+      * Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results 
+      * @param ci  the {@link ContentItem}
+      */
+    public void computeEnhancements( ContentItem ci ) throws EngineException {
+        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+        if(contentPart == null){
+            throw new IllegalStateException("No ContentPart with Mimetype '"
+                    + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri()
+                    + ": This is also checked in the canEnhance method! -> This "
+                    + "indicated an Bug in the implementation of the "
+                    + "EnhancementJobManager!");
+        }
+        String text = "";
+        try {
+            text = ContentItemHelper.getText(contentPart.getValue());
+        } catch (IOException e) {
+            throw new InvalidContentException(this, ci, e);
+        }
+
+        Collection<DBPSLAnnotation> dbpslGraph = doPostRequest( text );
+        if ( dbpslGraph != null ) {
+            //Acquire a write lock on the ContentItem when adding the enhancements
+            ci.getLock().writeLock().lock();
+            try {
+                createEnhancements( dbpslGraph, ci);
+                if (log.isDebugEnabled()) {
+                    Serializer serializer = Serializer.getInstance();
+                    ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
+                    serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
+                    try {
+                        log.debug("DBPedia Spotlight Enhancements:\n{}",debugStream.toString("UTF-8"));
+                    } catch (UnsupportedEncodingException e) {
+                        e.printStackTrace();
+                    }
+                }
+            } finally {
+                ci.getLock().writeLock().unlock();
+            }
+        }
+    }
+
+
+    /**
+     * This generates enhancement structures for the entities from DBPedia Spotlight
+     * and adds them to the content item's metadata.
+     * For each entity a TextAnnotation and an EntityAnnotation are created.
+     * An EntityAnnotation can relate to several TextAnnotations.
+     *
+     * @param occs a Collection of entity information
+     * @param ci the content item
+     */
+    public void createEnhancements( Collection<DBPSLAnnotation> occs, ContentItem ci ) {
+        LiteralFactory literalFactory = LiteralFactory.getInstance();
+        final Language language;       // used for plain literals representing parts fo the content
+        String langString             = getMetadataLanguage(ci.getMetadata(), null);
+        
+        if(langString != null && !langString.isEmpty()){
+            language = new Language(langString);
+        } else {
+            language = null;
+        }
+        
+        HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
+
+        for (DBPSLAnnotation occ : occs) {
+            UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(
+                    ci, this);
+            MGraph model = ci.getMetadata();
+            model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.surfaceForm,language)));
+            model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
+            model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.surfaceForm.length())));
+            // TODO ################## model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context,language)));
+            //create EntityAnnotation only once but add a reference to the textAnnotation
+            if (entityAnnotationMap.containsKey(occ.uri)) {
+                model.add(new TripleImpl(entityAnnotationMap.get(occ.uri), DC_RELATION, textAnnotation));
+            } else {
+            	UriRef entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
+            	entityAnnotationMap.put(occ.uri, entityAnnotation);
+            	Literal label = new PlainLiteralImpl( occ.surfaceForm, new Language("en"));
+            	model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
+            	model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
+
+            	HashSet<String> t   = occ.getTypeNames();
+            	if ( t != null ) {
+            		Iterator<String> it = t.iterator(); 
+            		while ( it.hasNext() ) {
+            			UriRef annotationType = new UriRef( it.next() );
+            			model.add( new TripleImpl( entityAnnotation, ENHANCER_ENTITY_TYPE, annotationType ) );
+                        model.add( new TripleImpl( textAnnotation, DC_TYPE, annotationType ) );
+            		}
+            	}
+            	model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
+            }
+        }
+    }
+
+
+
+
+    /**
+     * Sends a POST request to the DBpediaSpotlight endpoint.
+     * @param text a <code>String</code> with the text to be analyzed
+     * @return a <code>Collection<DBPSLAnnotation></code> with the server response
+     * @throws EngineException if the request cannot be sent
+     */
+    public Collection<DBPSLAnnotation> doPostRequest( String text ) throws EngineException {
+		StringBuilder data = new StringBuilder();
+		try {
+			if ( spotlightSpotter != null && !spotlightSpotter.isEmpty() )
+    			data.append( URLEncoder.encode( "spotter", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSpotter, "UTF-8" ) + "&" );
+			if ( spotlightDisambiguator != null && !spotlightDisambiguator.isEmpty() )
+    			data.append( URLEncoder.encode( "disambiguator", "UTF-8" ) + "=" + URLEncoder.encode( spotlightDisambiguator, "UTF-8" ) + "&" );
+			if ( spotlightTypesRestriction != null && !spotlightTypesRestriction.isEmpty() )
+                data.append( URLEncoder.encode( "types", "UTF-8" ) + "=" + URLEncoder.encode( spotlightTypesRestriction, "UTF-8" ) + "&" );
+			if ( spotlightSupport != null && !spotlightSupport.isEmpty() )
+        		data.append( URLEncoder.encode( "support", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSupport, "UTF-8" ) + "&" );
+			if ( spotlightConfidence != null && !spotlightConfidence.isEmpty() )
+    			data.append( URLEncoder.encode( "confidence", "UTF-8" ) + "=" + URLEncoder.encode( spotlightConfidence, "UTF-8" ) + "&" );
+			if ( spotlightSparql != null && !spotlightSparql.isEmpty() && spotlightTypesRestriction == null )
+			    data.append( URLEncoder.encode( "sparql", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSparql, "UTF-8" ) + "&" );
+			data.append( URLEncoder.encode( "text", "UTF-8" ) + "=" + URLEncoder.encode( text, "UTF-8" ) );
+		} catch (UnsupportedEncodingException e) {
+			throw new EngineException( "Data for the httprequest could not be converted. Error: " + e.getMessage() );
+		}
+
+		HttpURLConnection connection = null;  
+		StringBuffer response        = new StringBuffer(); 
+		
+        try {			
+			//Create connection
+        	URL url = new URL( spotlightUrl );
+			connection = ( HttpURLConnection )url.openConnection();
+			connection.setRequestMethod( "POST" );
+			connection.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded" );
+			connection.setRequestProperty( "Accept", "text/xml" );
+
+			connection.setUseCaches( false );
+			connection.setDoInput( true );
+			connection.setDoOutput( true );
+
+			//Send request
+			DataOutputStream wr = new DataOutputStream (
+					connection.getOutputStream ());
+			wr.writeBytes( data.toString() );
+			wr.flush ();
+			wr.close ();
+
+			//Get Response	
+			InputStream is = connection.getInputStream();
+			BufferedReader rd = new BufferedReader( new InputStreamReader( is ) );
+			String line;
+			while((line = rd.readLine()) != null) {
+				response.append( line );
+				response.append( '\r' );
+			}
+			rd.close();
+
+		} catch (Exception e) {
+
+			e.printStackTrace();
+			return null;
+
+		} finally {
+
+			if(connection != null) {
+				connection.disconnect(); 
+			}
+		}
+
+		
+        // Parse the response
+        XMLParser xmlParser = new XMLParser();
+		try {
+			Document xmlDoc                   = xmlParser.loadXMLFromString( response.toString() );
+			NodeList nlist                    = xmlParser.getElementsByTagName( xmlDoc, "Resource" );
+			Collection<DBPSLAnnotation> annos = this.getAnnotations( nlist );
+        
+            return annos;
+   		} catch ( Exception e) {
+			throw new EngineException( "Response XML could not be parsed. Error: " + e.getMessage() );
+		}
+    }
+ 
+ 
+    /**
+     * This method creates the Collection of Annotations, which the method <code>createEnhancement</code>
+     * adds to the meta data of the content item.
+     * @param nList NodeList of all Resources contained in the XML response from DBpedia Spotlight
+     * @return a Collection<DBPSLAnnotation> with all annotations
+     */
+	private Collection<DBPSLAnnotation> getAnnotations( NodeList nList ) {
+		Collection<DBPSLAnnotation> dbpslAnnos = new HashSet<DBPSLAnnotation>();
+		
+		for (int temp = 0; temp < nList.getLength(); temp++) {
+			DBPSLAnnotation dbpslann        = new DBPSLAnnotation();
+			Element node                    = (Element) nList.item(temp);
+			dbpslann.uri                    = new UriRef( node.getAttribute( "URI" ) );
+			dbpslann.support                = (new Integer( node.getAttribute( "support" ) ) ).intValue();
+			dbpslann.types                  = node.getAttribute( "types" );
+			dbpslann.surfaceForm            = node.getAttribute( "surfaceForm" );
+			dbpslann.offset                 = (new Integer( node.getAttribute( "offset" ) ) ).intValue();
+			dbpslann.similarityScore        = (new Double( node.getAttribute( "similarityScore" ) ) ).doubleValue();
+			dbpslann.percentageOfSecondRank = (new Double( node.getAttribute( "percentageOfSecondRank" ) ) ).doubleValue();
+			
+			dbpslAnnos.add( dbpslann );
+		}
+		
+		return dbpslAnnos;
+	}
+
+
+    public Map<String, Object> getServiceProperties() {
+        return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+    }
+
+
+    public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+        Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+        if (it.hasNext()) {
+            Resource langNode = it.next().getObject();
+            return getLexicalForm(langNode);
+        }
+        return null;
+    }
+
+    public String getLexicalForm(Resource res) {
+        if (res == null) {
+            return null;
+        } else if (res instanceof Literal) {
+            return ((Literal) res).getLexicalForm();
+        } else {
+            return res.toString();
+        }
+    }
+
+    
+    /**
+     * This method is used by the test class to set the endpoint url
+     * @param url String the url of the Spotlight endpoint
+     */
+    public void setEndpointUrl( String url ) {
+    	spotlightUrl = url;
+    }
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+
+public class XMLParser {
+ 
+	public NodeList getElementsByTagName( Document doc, String tagName ) {
+		
+		return doc.getElementsByTagName( tagName );
+	}
+	
+
+	public Document loadXMLFromString( String xml ) throws SAXException, IOException {
+		Document doc = loadXMLFromInputStream( new ByteArrayInputStream( xml.getBytes() ) );
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+	
+	public Document loadXMLFromInputStream( InputStream is ) throws SAXException, IOException {
+		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+		factory.setNamespaceAware( true );
+		DocumentBuilder builder        = null;
+		try {
+			builder = factory.newDocumentBuilder();
+		}
+		catch ( ParserConfigurationException ex ) {
+		}  
+		Document doc = builder.parse(is);
+		is.close();
+		doc.getDocumentElement().normalize();
+
+		return doc;
+	}
+
+
+	public Document loadXMLFromFile( String filePath ) throws ParserConfigurationException, SAXException, IOException {
+		File fXmlFile = new File( filePath );
+		DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+		DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+		Document doc = dBuilder.parse(fXmlFile);
+		doc.getDocumentElement().normalize();
+		
+		return doc;
+	}
+}
\ No newline at end of file

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,42 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+
+
+# This file contains localization strings for configuration labels and
+# descriptions as used in the metatype.xml descriptor generated by the
+# the maven SCR plugin
+
+stanbol.DBPSpotlightAnnotateEnhancementEngine.name = DBpedia Spotlight Annotate: Named Entity Extraction and Ontology Linking
+stanbol.DBPSpotlightAnnotateEnhancementEngine.description = Find names of people, organization, \
+ places... disambiguate and link them to DBpedia Ontology URIs. This is a complete EnhancementChain, all in one Engine.
+stanbol.DBPSpotlightAnnotateEnhancementEngine.url.name = Spotlight URL
+stanbol.DBPSpotlightAnnotateEnhancementEngine.url.description = The URL which will be used for the request
+stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter.name = Spotter
+stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter.description = The algorithm which will be used for Spotting \
+    (aka Term Recognition). Currently available: NER, LingPipeSpotter, OpenNLPChunkerSpotter, Kea
+stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator.name = Disambiguator
+stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator.description = The algorithm used for ranking of senses \
+     based on context. Currently available: Document, Occurrences
+stanbol.DBPSpotlightAnnotateEnhancementEngine.types.name = Types Restriction
+stanbol.DBPSpotlightAnnotateEnhancementEngine.types.description = The DBpedia Ontology types you wish to restrict your results to
+stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql.name = Sparql
+stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql.description = Restrict the result with SPARQL
+stanbol.DBPSpotlightAnnotateEnhancementEngine.support.name = Support
+stanbol.DBPSpotlightAnnotateEnhancementEngine.support.description = Filter the results based on a support metric
+stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence.name = Confidence
+stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence.description = Filter the results based on a confidence metric

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate.core;
+
+import java.util.Collection;
+
+import org.apache.stanbol.enhancer.engines.dbpspotlightannotate.DBPSLAnnotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlightannotate.DBPSpotlightAnnotateEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Annotate EnhancementEngine.
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightAnnotateEnhancementTest {
+
+	  /**
+	   * This contains the logger.
+	   */
+	  private static final Logger LOG  = LoggerFactory.getLogger(DBPSpotlightAnnotateEnhancementTest.class);
+	  private static String SPL_URL    = System.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY) == null ? 
+			  											"http://spotlight.dbpedia.org/rest/annotate" : (String) System.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY);
+	  private static String TEST_TEXT  = "President Obama is meeting Angela Merkel in Berlin on Monday";
+	  private static DBPSpotlightAnnotateEnhancementEngine dbpslight;
+	  
+	  @BeforeClass
+	  public static void oneTimeSetup() throws ConfigurationException {
+		  dbpslight = new DBPSpotlightAnnotateEnhancementEngine();
+		  dbpslight.setEndpointUrl( SPL_URL );
+	  }
+
+	  
+	  @Test
+	  public void testEntityExtraction() {
+	    Collection<DBPSLAnnotation> entities;
+		try {
+			entities = dbpslight.doPostRequest( TEST_TEXT );
+		    LOG.info("Found entities: {}",entities.size());
+		    LOG.debug("Entities:\n{}",entities);
+		    Assert.assertFalse("No entities were found!", entities.isEmpty());
+		} catch (EngineException e) {
+		    Assert.assertFalse("An EngineException occurred! The message was: " + e.getMessage(), true);
+		}
+	  }
+
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README Mon Aug 20 12:11:01 2012
@@ -0,0 +1,15 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+

Propchange: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Aug 20 12:11:01 2012
@@ -0,0 +1,7 @@
+target
+
+.settings
+
+.classpath
+
+.project

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md Mon Aug 20 12:11:01 2012
@@ -0,0 +1,96 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+# LangId: Language Identification Enhancement Engine
+
+The **LangId** engine determines the language of text. 
+
+## Technical Description
+
+The provided engine is based on the language identifier of [Apache Tika](http://tika.apache.org/).
+The text to be checked must be provided in plain text format in one of two forms:
+
+* a plain text content item
+* by the content item's metadata as the string value of the property 
+    
+    <pre><code>http://www.semanticdesktop.org/ontologies/2007/01/19/nie#plainTextContent</pre></code>
+
+The result of language identification is added as TextAnnotation to the content item's metadata as string value of the property
+
+    http://purl.org/dc/terms/language
+
+This RDF snippet illustrates the output:
+
+    <fise:TextAnnotation rdf:about="urn:enhancement-a147957b-41f9-58f7-bbf1-b880b3aa4b49">
+        <dc:language>en</dc:language>
+        <dc:creator>org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine</dc:creator>
+    </fise:TextAnnotation>
+
+
+By default the language identifier distinguishes the languages listed below. After the colon the value of the language label in the metadata is given.
+
+* German: de
+* English: en
+* Estonian: et
+* French: fr
+* Spanish: es
+* Italian: it
+* Swedish: sv
+* Polish: pl
+* Dutch: nl
+* Norwegian: no
+* Finnish: fi
+* Greek: el
+* Danish: da
+* Hungarian: hu
+* Icelandic: is
+* Lithuanian: lt
+* Portuguese: pt
+* Russian: ru
+* Thai: th
+
+Additional language models can be created as Tika [LanguageProfile](org.apache.tika.language.LanguageProfile).
+
+## Configuration options
+
+* <pre><code>org.apache.stanbol.enhancer.engines.langid.probe-length</pre></code>
+
+    an integer specifying how many characters will be used for
+    identification. A value of 0 or below means to use the complete
+    text. Otherwise only a substring of the specified length taken from the
+    middle of the text will be used. The default value is 400 characters.
+
+## Usage
+
+Assuming that the Stanbol endpoint with the full launcher is running at
+
+    http://localhost:8080
+
+and the engine is activated, from the command line commands like this
+can be used for submitting some text file as content item:
+
+* stateless interface
+
+    curl -i -X POST -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/engines
+
+* stateful interface
+
+    curl -i -X PUT -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/contenthub/content/someFileId
+
+Alternatively, the Stanbol web interface can be used for submitting documents
+and viewing the metadata at
+
+    http://localhost:8080/contenthub
+

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml Mon Aug 20 12:11:01 2012
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <groupId>org.apache.stanbol</groupId>
+    <version>0.9.0-incubating</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightcandidates</artifactId>
+  <packaging>bundle</packaging>
+
+  <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Candidates</name>
+  <description>an enhancement engine for associating candidate DBpedia URIs to spotted surfaceForms</description>
+
+  <inceptionYear>2010</inceptionYear>
+
+  <!--scm>
+    <connection>
+      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </connection>
+    <developerConnection>
+      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+    </developerConnection>
+    <url>http://incubator.apache.org/stanbol/</url>
+  </scm-->
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-bundle-plugin</artifactId>
+        <extensions>true</extensions>
+        <configuration>
+          <instructions>
+            <Export-Package>
+              org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;version=${project.version}
+            </Export-Package>
+            <Embed-Dependency>
+            </Embed-Dependency>
+          </instructions>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.felix</groupId>
+        <artifactId>maven-scr-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <!-- AL20 licensed files: See src/test/resources/README -->
+            <exclude>src/test/resources/en.txt</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,17 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache License
+# - Common Development and Distribution License (CDDL) v1.0
+# - Common Public License Version 1.0
+# - ICU License
+# - MIT License
+# - The Apache Software License, Version 2.0
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Wed Feb 15 19:06:13 CET 2012
+javax.servlet--servlet-api--2.4=Common Development And Distribution License (CDDL), Version 1.0
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the candidate ressources given by DBPedia Spotlight Candidates.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class CandidateResource {
+
+	public String label;
+	public String uri;
+	public double contextualScore;
+	public double percentageOfSecondRank;
+	public double support;
+	public double priorScore;
+	public double finalScore;
+
+	public String toString() {
+		return String.format( "[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, " +
+				"percentageOfSecondRank=%d, contextualScore=%d]", label, uri, contextualScore, percentageOfSecondRank, support, priorScore, finalScore ) ;
+	}
+}

Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;
+
+import java.util.ArrayList;
+import java.util.List;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Candidates.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class DBPSLSurfaceForm {
+
+	public String name;
+	public String type;
+	public Integer offset;
+	public List<CandidateResource> resources = new ArrayList<CandidateResource>();
+
+	public String toString() {
+		return String.format( "[name=%s, offset=%i, type=%s]", name, offset, type ) ;
+	}
+}