You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/20 14:11:05 UTC
svn commit: r1374984 [1/3] - in
/incubator/stanbol/branches/dbpedia-spotlight-engines/engines:
dbpspotlightannotate/ dbpspotlightannotate/src/
dbpspotlightannotate/src/license/ dbpspotlightannotate/src/main/
dbpspotlightannotate/src/main/java/ dbpspotl...
Author: rwesten
Date: Mon Aug 20 12:11:01 2012
New Revision: 1374984
URL: http://svn.apache.org/viewvc?rev=1374984&view=rev
Log:
STANBOL-704: Applied the provided patch with a minor correction of a package name of an unit test for the disambiguation engine
Added:
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/ (with props)
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/ (with props)
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSpotlightCandidatesEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/OSGI-INF/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/OSGI-INF/metatype/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/core/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/core/DBPSpotlightCandidatesEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/resources/README
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/ (with props)
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/license/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/license/THIRD-PARTY.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/DBPSLAnnotation.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/OSGI-INF/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/OSGI-INF/metatype/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/core/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/README
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/spots.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/ (with props)
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/license/THIRD-PARTY.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/resources/README
Propchange: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Aug 20 12:11:01 2012
@@ -0,0 +1,7 @@
+.classpath
+
+.settings
+
+target
+
+.project
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md Mon Aug 20 12:11:01 2012
@@ -0,0 +1,96 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+# LangId: Language Identification Enhancement Engine
+
+The **LangId** engine determines the language of text.
+
+## Technical Description
+
+The provided engine is based on the language identifier of [Apache Tika](http://tika.apache.org/).
+The text to be checked must be provided in plain text format in one of two forms:
+
+* a plain text content item
+* by the content item's metadata as the string value of the property
+
+ <pre><code>http://www.semanticdesktop.org/ontologies/2007/01/19/nie#plainTextContent</pre></code>
+
+The result of language identification is added as TextAnnotation to the content item's metadata as string value of the property
+
+ http://purl.org/dc/terms/language
+
+This RDF snippet illustrates the output:
+
+ <fise:TextAnnotation rdf:about="urn:enhancement-a147957b-41f9-58f7-bbf1-b880b3aa4b49">
+ <dc:language>en</dc:language>
+ <dc:creator>org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine</dc:creator>
+ </fise:TextAnnotation>
+
+
+By default the language identifier distinguishes the languages listed below. After the colon the value of the language label in the metadata is given.
+
+* German: de
+* English: en
+* Estonian: et
+* French: fr
+* Spanish: es
+* Italian: it
+* Swedish: sv
+* Polish: pl
+* Dutch: nl
+* Norwegian: no
+* Finnish: fi
+* Greek: el
+* Danish: da
+* Hungarian: hu
+* Icelandic: is
+* Lithuanian: lt
+* Portuguese: pt
+* Russian: ru
+* Thai: th
+
+Additional language models can be created as Tika [LanguageProfile](org.apache.tika.language.LanguageProfile).
+
+## Configuration options
+
+* <pre><code>org.apache.stanbol.enhancer.engines.langid.probe-length</pre></code>
+
+ an integer specifying how many characters will be used for
+ identification. A value of 0 or below means to use the complete
+ text. Otherwise only a substring of the specified length taken from the
+ middle of the text will be used. The default value is 400 characters.
+
+## Usage
+
+Assuming that the Stanbol endpoint with the full launcher is running at
+
+ http://localhost:8080
+
+and the engine is activated, from the command line commands like this
+can be used for submitting some text file as content item:
+
+* stateless interface
+
+ curl -i -X POST -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/engines
+
+* stateful interface
+
+ curl -i -X PUT -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/contenthub/content/someFileId
+
+Alternatively, the Stanbol web interface can be used for submitting documents
+and viewing the metadata at
+
+ http://localhost:8080/contenthub
+
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml Mon Aug 20 12:11:01 2012
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <groupId>org.apache.stanbol</groupId>
+ <version>0.9.0-incubating</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightannotate</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
+ <description>just tests the Stanbol Engine Import
+ </description>
+
+ <inceptionYear>2010</inceptionYear>
+
+ <!--scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm-->
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.enhancer.engines.dbpspotlightannotate;version=${project.version}
+ </Export-Package>
+ <Embed-Dependency>
+ </Embed-Dependency>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 licensed files: See src/test/resources/README -->
+ <exclude>src/test/resources/en.txt</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/license/THIRD-PARTY.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,17 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache License
+# - Common Development and Distribution License (CDDL) v1.0
+# - Common Public License Version 1.0
+# - ICU License
+# - MIT License
+# - The Apache Software License, Version 2.0
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Wed Feb 15 19:06:13 CET 2012
+javax.servlet--servlet-api--2.4=Common Development And Distribution License (CDDL), Version 1.0
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate;
+
+import java.util.HashSet;
+
+import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Contains a result given by DBPedia Spotlight..
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class DBPSLAnnotation {
+
+ public Resource uri;
+ public String types;
+ public Integer support;
+ public String surfaceForm;
+ public Integer offset;
+ public Double similarityScore;
+ public Double percentageOfSecondRank;
+
+
+ public HashSet<String> getTypeNames() {
+ if (types != null) {
+ HashSet<String> t = new HashSet<String>();
+ String[] typex = types.split(",");
+ for ( String type: typex ) {
+ // make the returned types referenceable
+ String deref = type.replace( "DBpedia:", "dbp-ont:")
+ .replace( "Freebase:", "http://www.freebase.com/schema")
+ .replace( "Schema:", "http://www.schema.org/");
+ t.add(deref);
+ }
+ return t;
+ }
+ return null;
+ }
+
+ public String toString() {
+ return String.format("[uri=%s, support=%i, types=%s, surfaceForm=\"%s\", offset=%i, similarityScore=%d, percentageOfSecondRank=%d]",
+ uri, support, types, surfaceForm, offset, similarityScore, percentageOfSecondRank);
+ }
+}
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,432 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.clerezza.rdf.core.serializedform.Serializer;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+/**
+ * {@link DBPSpotlightAnnotateEnhancementEngine} provides functionality to enhance a document
+ * using the DBpedia Spotlight /annotate REST endpoint
+ * @author Iavor Jelev, Babelmonkeys (GzEvD)
+ */
+@Component(
+ metatype = true,
+ immediate = true,
+ label = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.name",
+ description = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.description")
+@Service
+@Properties(value={
+ @Property(name=EnhancementEngine.PROPERTY_NAME,value="dbpspotlightannotate")
+})
+public class DBPSpotlightAnnotateEnhancementEngine
+ extends AbstractEnhancementEngine<IOException,RuntimeException>
+ implements EnhancementEngine, ServiceProperties {
+
+ /**
+ * a configurable value of the text segment length to check
+ */
+ @Property(value = "http://spotlight.dbpedia.org/rest/annotate")
+ public static final String SL_URL_KEY = "stanbol.DBPSpotlightAnnotateEnhancementEngine.url";
+
+ @Property(value = "NESpotter")
+ public static final String SL_SPOTTER = "stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter";
+
+ @Property(value = "")
+ public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator";
+
+ @Property()
+ public static final String SL_RESTRICTION = "stanbol.DBPSpotlightAnnotateEnhancementEngine.types";
+
+ @Property()
+ public static final String SL_SPARQL = "stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql";
+
+ @Property()
+ public static final String SL_SUPPORT = "stanbol.DBPSpotlightAnnotateEnhancementEngine.support";
+
+ @Property()
+ public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence";
+
+
+ /**
+ * The default value for the Execution of this Engine.
+ */
+ public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 27;
+
+ /** This contains the only MIME type directly supported by this enhancement engine. */
+ private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+ /** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
+ private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+ /** holds the logger. */
+ private static final Logger log = LoggerFactory.getLogger(DBPSpotlightAnnotateEnhancementEngine.class);
+
+ /** holds the url of the Spotlight REST endpoint */
+ private String spotlightUrl;
+ /** holds the chosen of spotter to be used */
+ private String spotlightSpotter;
+ /** holds the chosen of disambiguator to be used */
+ private String spotlightDisambiguator;
+ /** holds the type restriction for the results, if the user wishes one */
+ private String spotlightTypesRestriction;
+ /** holds the chosen minimal support value */
+ private String spotlightSupport;
+ /** holds the chosen minimal confidence value */
+ private String spotlightConfidence;
+ /** holds the sparql restriction for the results, if the user wishes one */
+ private String spotlightSparql;
+
+
+ /**
+ * Initialize all parameters from the configuration panel, or with their default values
+ * @param ce the {@link ComponentContext}
+ */
+ @SuppressWarnings("unchecked")
+ protected void activate( ComponentContext ce ) throws ConfigurationException, IOException {
+
+ super.activate(ce);
+
+ Dictionary<String, Object> properties = ce.getProperties();
+ spotlightUrl = properties.get( SL_URL_KEY ) == null ? "http://spotlight.dbpedia.org/rest/annotate" : (String) properties.get( SL_URL_KEY );
+ spotlightSpotter = properties.get( SL_SPOTTER ) == null ? null : (String) properties.get( SL_SPOTTER );
+ spotlightDisambiguator = properties.get( SL_DISAMBIGUATOR ) == null ? null : (String) properties.get( SL_DISAMBIGUATOR );
+ spotlightTypesRestriction = properties.get( SL_RESTRICTION ) == null ? null : (String) properties.get( SL_RESTRICTION );
+ spotlightSparql = properties.get( SL_SPARQL ) == null ? null : (String) properties.get( SL_SPARQL );
+ spotlightSupport = properties.get( SL_SUPPORT ) == null ? null : (String) properties.get( SL_SUPPORT );
+ spotlightConfidence = properties.get( SL_CONFIDENCE ) == null ? null : (String) properties.get( SL_CONFIDENCE );
+ }
+
+
+
+ /**
+ * Check if the content can be enhanced
+ * @param ci the {@link ContentItem}
+ */
+ public int canEnhance( ContentItem ci ) throws EngineException {
+ if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null){
+ return ENHANCE_SYNCHRONOUS;
+ } else {
+ return CANNOT_ENHANCE;
+ }
+ }
+
+
+ /**
+ * Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results
+ * @param ci the {@link ContentItem}
+ */
+ public void computeEnhancements( ContentItem ci ) throws EngineException {
+ Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+ if(contentPart == null){
+ throw new IllegalStateException("No ContentPart with Mimetype '"
+ + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri()
+ + ": This is also checked in the canEnhance method! -> This "
+ + "indicated an Bug in the implementation of the "
+ + "EnhancementJobManager!");
+ }
+ String text = "";
+ try {
+ text = ContentItemHelper.getText(contentPart.getValue());
+ } catch (IOException e) {
+ throw new InvalidContentException(this, ci, e);
+ }
+
+ Collection<DBPSLAnnotation> dbpslGraph = doPostRequest( text );
+ if ( dbpslGraph != null ) {
+ //Acquire a write lock on the ContentItem when adding the enhancements
+ ci.getLock().writeLock().lock();
+ try {
+ createEnhancements( dbpslGraph, ci);
+ if (log.isDebugEnabled()) {
+ Serializer serializer = Serializer.getInstance();
+ ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
+ serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
+ try {
+ log.debug("DBPedia Spotlight Enhancements:\n{}",debugStream.toString("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
+ }
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ }
+ }
+
+
+ /**
+ * This generates enhancement structures for the entities from DBPedia Spotlight
+ * and adds them to the content item's metadata.
+ * For each entity a TextAnnotation and an EntityAnnotation are created.
+ * An EntityAnnotation can relate to several TextAnnotations.
+ *
+ * @param occs a Collection of entity information
+ * @param ci the content item
+ */
+ public void createEnhancements( Collection<DBPSLAnnotation> occs, ContentItem ci ) {
+ LiteralFactory literalFactory = LiteralFactory.getInstance();
+ final Language language; // used for plain literals representing parts fo the content
+ String langString = getMetadataLanguage(ci.getMetadata(), null);
+
+ if(langString != null && !langString.isEmpty()){
+ language = new Language(langString);
+ } else {
+ language = null;
+ }
+
+ HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
+
+ for (DBPSLAnnotation occ : occs) {
+ UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(
+ ci, this);
+ MGraph model = ci.getMetadata();
+ model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.surfaceForm,language)));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.surfaceForm.length())));
+ // TODO ################## model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context,language)));
+ //create EntityAnnotation only once but add a reference to the textAnnotation
+ if (entityAnnotationMap.containsKey(occ.uri)) {
+ model.add(new TripleImpl(entityAnnotationMap.get(occ.uri), DC_RELATION, textAnnotation));
+ } else {
+ UriRef entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
+ entityAnnotationMap.put(occ.uri, entityAnnotation);
+ Literal label = new PlainLiteralImpl( occ.surfaceForm, new Language("en"));
+ model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
+ model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
+
+ HashSet<String> t = occ.getTypeNames();
+ if ( t != null ) {
+ Iterator<String> it = t.iterator();
+ while ( it.hasNext() ) {
+ UriRef annotationType = new UriRef( it.next() );
+ model.add( new TripleImpl( entityAnnotation, ENHANCER_ENTITY_TYPE, annotationType ) );
+ model.add( new TripleImpl( textAnnotation, DC_TYPE, annotationType ) );
+ }
+ }
+ model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
+ }
+ }
+ }
+
+
+
+
+ /**
+ * Sends a POST request to the DBpediaSpotlight endpoint.
+ * @param text a <code>String</code> with the text to be analyzed
+ * @return a <code>Collection<DBPSLAnnotation></code> with the server response
+ * @throws EngineException if the request cannot be sent
+ */
+ public Collection<DBPSLAnnotation> doPostRequest( String text ) throws EngineException {
+ StringBuilder data = new StringBuilder();
+ try {
+ if ( spotlightSpotter != null && !spotlightSpotter.isEmpty() )
+ data.append( URLEncoder.encode( "spotter", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSpotter, "UTF-8" ) + "&" );
+ if ( spotlightDisambiguator != null && !spotlightDisambiguator.isEmpty() )
+ data.append( URLEncoder.encode( "disambiguator", "UTF-8" ) + "=" + URLEncoder.encode( spotlightDisambiguator, "UTF-8" ) + "&" );
+ if ( spotlightTypesRestriction != null && !spotlightTypesRestriction.isEmpty() )
+ data.append( URLEncoder.encode( "types", "UTF-8" ) + "=" + URLEncoder.encode( spotlightTypesRestriction, "UTF-8" ) + "&" );
+ if ( spotlightSupport != null && !spotlightSupport.isEmpty() )
+ data.append( URLEncoder.encode( "support", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSupport, "UTF-8" ) + "&" );
+ if ( spotlightConfidence != null && !spotlightConfidence.isEmpty() )
+ data.append( URLEncoder.encode( "confidence", "UTF-8" ) + "=" + URLEncoder.encode( spotlightConfidence, "UTF-8" ) + "&" );
+ if ( spotlightSparql != null && !spotlightSparql.isEmpty() && spotlightTypesRestriction == null )
+ data.append( URLEncoder.encode( "sparql", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSparql, "UTF-8" ) + "&" );
+ data.append( URLEncoder.encode( "text", "UTF-8" ) + "=" + URLEncoder.encode( text, "UTF-8" ) );
+ } catch (UnsupportedEncodingException e) {
+ throw new EngineException( "Data for the httprequest could not be converted. Error: " + e.getMessage() );
+ }
+
+ HttpURLConnection connection = null;
+ StringBuffer response = new StringBuffer();
+
+ try {
+ //Create connection
+ URL url = new URL( spotlightUrl );
+ connection = ( HttpURLConnection )url.openConnection();
+ connection.setRequestMethod( "POST" );
+ connection.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded" );
+ connection.setRequestProperty( "Accept", "text/xml" );
+
+ connection.setUseCaches( false );
+ connection.setDoInput( true );
+ connection.setDoOutput( true );
+
+ //Send request
+ DataOutputStream wr = new DataOutputStream (
+ connection.getOutputStream ());
+ wr.writeBytes( data.toString() );
+ wr.flush ();
+ wr.close ();
+
+ //Get Response
+ InputStream is = connection.getInputStream();
+ BufferedReader rd = new BufferedReader( new InputStreamReader( is ) );
+ String line;
+ while((line = rd.readLine()) != null) {
+ response.append( line );
+ response.append( '\r' );
+ }
+ rd.close();
+
+ } catch (Exception e) {
+
+ e.printStackTrace();
+ return null;
+
+ } finally {
+
+ if(connection != null) {
+ connection.disconnect();
+ }
+ }
+
+
+ // Parse the response
+ XMLParser xmlParser = new XMLParser();
+ try {
+ Document xmlDoc = xmlParser.loadXMLFromString( response.toString() );
+ NodeList nlist = xmlParser.getElementsByTagName( xmlDoc, "Resource" );
+ Collection<DBPSLAnnotation> annos = this.getAnnotations( nlist );
+
+ return annos;
+ } catch ( Exception e) {
+ throw new EngineException( "Response XML could not be parsed. Error: " + e.getMessage() );
+ }
+ }
+
+
+ /**
+ * This method creates the Collection of Annotations, which the method <code>createEnhancement</code>
+ * adds to the meta data of the content item.
+ * @param nList NodeList of all Resources contained in the XML response from DBpedia Spotlight
+ * @return a Collection<DBPSLAnnotation> with all annotations
+ */
+ private Collection<DBPSLAnnotation> getAnnotations( NodeList nList ) {
+ Collection<DBPSLAnnotation> dbpslAnnos = new HashSet<DBPSLAnnotation>();
+
+ for (int temp = 0; temp < nList.getLength(); temp++) {
+ DBPSLAnnotation dbpslann = new DBPSLAnnotation();
+ Element node = (Element) nList.item(temp);
+ dbpslann.uri = new UriRef( node.getAttribute( "URI" ) );
+ dbpslann.support = (new Integer( node.getAttribute( "support" ) ) ).intValue();
+ dbpslann.types = node.getAttribute( "types" );
+ dbpslann.surfaceForm = node.getAttribute( "surfaceForm" );
+ dbpslann.offset = (new Integer( node.getAttribute( "offset" ) ) ).intValue();
+ dbpslann.similarityScore = (new Double( node.getAttribute( "similarityScore" ) ) ).doubleValue();
+ dbpslann.percentageOfSecondRank = (new Double( node.getAttribute( "percentageOfSecondRank" ) ) ).doubleValue();
+
+ dbpslAnnos.add( dbpslann );
+ }
+
+ return dbpslAnnos;
+ }
+
+
+ public Map<String, Object> getServiceProperties() {
+ return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+ }
+
+
+ public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+ Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+ if (it.hasNext()) {
+ Resource langNode = it.next().getObject();
+ return getLexicalForm(langNode);
+ }
+ return null;
+ }
+
+ public String getLexicalForm(Resource res) {
+ if (res == null) {
+ return null;
+ } else if (res instanceof Literal) {
+ return ((Literal) res).getLexicalForm();
+ } else {
+ return res.toString();
+ }
+ }
+
+
+ /**
+ * This method is used by the test class to set the endpoint url
+ * @param url String the url of the Spotlight endpoint
+ */
+ public void setEndpointUrl( String url ) {
+ spotlightUrl = url;
+ }
+
+}
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+
+/**
+ * Parses the XML results given by DBPedia Spotlight.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+
+public class XMLParser {
+
+ public NodeList getElementsByTagName( Document doc, String tagName ) {
+
+ return doc.getElementsByTagName( tagName );
+ }
+
+
+ public Document loadXMLFromString( String xml ) throws SAXException, IOException {
+ Document doc = loadXMLFromInputStream( new ByteArrayInputStream( xml.getBytes() ) );
+ doc.getDocumentElement().normalize();
+
+ return doc;
+ }
+
+
+ public Document loadXMLFromInputStream( InputStream is ) throws SAXException, IOException {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ factory.setNamespaceAware( true );
+ DocumentBuilder builder = null;
+ try {
+ builder = factory.newDocumentBuilder();
+ }
+ catch ( ParserConfigurationException ex ) {
+ }
+ Document doc = builder.parse(is);
+ is.close();
+ doc.getDocumentElement().normalize();
+
+ return doc;
+ }
+
+
+ public Document loadXMLFromFile( String filePath ) throws ParserConfigurationException, SAXException, IOException {
+ File fXmlFile = new File( filePath );
+ DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+ Document doc = dBuilder.parse(fXmlFile);
+ doc.getDocumentElement().normalize();
+
+ return doc;
+ }
+}
\ No newline at end of file
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/resources/OSGI-INF/metatype/metatype.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+
+# This file contains localization strings for configuration labels and
+# descriptions as used in the metatype.xml descriptor generated by the
+# the maven SCR plugin
+
+stanbol.DBPSpotlightAnnotateEnhancementEngine.name = DBpedia Spotlight Annotate: Named Entity Extraction and Ontology Linking
+stanbol.DBPSpotlightAnnotateEnhancementEngine.description = Find names of people, organization, \
+ places... disambiguate and link them to DBpedia Ontology URIs. This is a complete EnhancementChain, all in one Engine.
+stanbol.DBPSpotlightAnnotateEnhancementEngine.url.name = Spotlight URL
+stanbol.DBPSpotlightAnnotateEnhancementEngine.url.description = The URL which will be used for the request
+stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter.name = Spotter
+stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter.description = The algorithm which will be used for Spotting \
+ (aka Term Recognition). Currently available: NER, LingPipeSpotter, OpenNLPChunkerSpotter, Kea
+stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator.name = Disambiguator
+stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator.description = The algorithm used for ranking of senses \
+ based on context. Currently available: Document, Occurrences
+stanbol.DBPSpotlightAnnotateEnhancementEngine.types.name = Types Restriction
+stanbol.DBPSpotlightAnnotateEnhancementEngine.types.description = The DBpedia Ontology types you wish to restrict your results to
+stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql.name = Sparql
+stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql.description = Restrict the result with SPARQL
+stanbol.DBPSpotlightAnnotateEnhancementEngine.support.name = Support
+stanbol.DBPSpotlightAnnotateEnhancementEngine.support.description = Filter the results based on a support metric
+stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence.name = Confidence
+stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence.description = Filter the results based on a confidence metric
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightannotate.core;
+
+import java.util.Collection;
+
+import org.apache.stanbol.enhancer.engines.dbpspotlightannotate.DBPSLAnnotation;
+import org.apache.stanbol.enhancer.engines.dbpspotlightannotate.DBPSpotlightAnnotateEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class provides a JUnit test for DBpedia Spotlight Annotate EnhancementEngine.
+ * @author Iavor Jelev, babelmonkeys / GzEvD
+ */
+public class DBPSpotlightAnnotateEnhancementTest {
+
+ /**
+ * This contains the logger.
+ */
+ private static final Logger LOG = LoggerFactory.getLogger(DBPSpotlightAnnotateEnhancementTest.class);
+ private static String SPL_URL = System.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY) == null ?
+ "http://spotlight.dbpedia.org/rest/annotate" : (String) System.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY);
+ private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
+ private static DBPSpotlightAnnotateEnhancementEngine dbpslight;
+
+ @BeforeClass
+ public static void oneTimeSetup() throws ConfigurationException {
+ dbpslight = new DBPSpotlightAnnotateEnhancementEngine();
+ dbpslight.setEndpointUrl( SPL_URL );
+ }
+
+
+ @Test
+ public void testEntityExtraction() {
+ Collection<DBPSLAnnotation> entities;
+ try {
+ entities = dbpslight.doPostRequest( TEST_TEXT );
+ LOG.info("Found entities: {}",entities.size());
+ LOG.debug("Entities:\n{}",entities);
+ Assert.assertFalse("No entities were found!", entities.isEmpty());
+ } catch (EngineException e) {
+ Assert.assertFalse("An EngineException occurred! The message was: " + e.getMessage(), true);
+ }
+ }
+
+}
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/resources/README Mon Aug 20 12:11:01 2012
@@ -0,0 +1,15 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
Propchange: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Aug 20 12:11:01 2012
@@ -0,0 +1,7 @@
+target
+
+.settings
+
+.classpath
+
+.project
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md Mon Aug 20 12:11:01 2012
@@ -0,0 +1,96 @@
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements. See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+# LangId: Language Identification Enhancement Engine
+
+The **LangId** engine determines the language of text.
+
+## Technical Description
+
+The provided engine is based on the language identifier of [Apache Tika](http://tika.apache.org/).
+The text to be checked must be provided in plain text format in one of two forms:
+
+* a plain text content item
+* by the content item's metadata as the string value of the property
+
+ <pre><code>http://www.semanticdesktop.org/ontologies/2007/01/19/nie#plainTextContent</pre></code>
+
+The result of language identification is added as TextAnnotation to the content item's metadata as string value of the property
+
+ http://purl.org/dc/terms/language
+
+This RDF snippet illustrates the output:
+
+ <fise:TextAnnotation rdf:about="urn:enhancement-a147957b-41f9-58f7-bbf1-b880b3aa4b49">
+ <dc:language>en</dc:language>
+ <dc:creator>org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine</dc:creator>
+ </fise:TextAnnotation>
+
+
+By default the language identifier distinguishes the languages listed below. After the colon the value of the language label in the metadata is given.
+
+* German: de
+* English: en
+* Estonian: et
+* French: fr
+* Spanish: es
+* Italian: it
+* Swedish: sv
+* Polish: pl
+* Dutch: nl
+* Norwegian: no
+* Finnish: fi
+* Greek: el
+* Danish: da
+* Hungarian: hu
+* Icelandic: is
+* Lithuanian: lt
+* Portuguese: pt
+* Russian: ru
+* Thai: th
+
+Additional language models can be created as Tika [LanguageProfile](org.apache.tika.language.LanguageProfile).
+
+## Configuration options
+
+* <pre><code>org.apache.stanbol.enhancer.engines.langid.probe-length</pre></code>
+
+ an integer specifying how many characters will be used for
+ identification. A value of 0 or below means to use the complete
+ text. Otherwise only a substring of the specified length taken from the
+ middle of the text will be used. The default value is 400 characters.
+
+## Usage
+
+Assuming that the Stanbol endpoint with the full launcher is running at
+
+ http://localhost:8080
+
+and the engine is activated, from the command line commands like this
+can be used for submitting some text file as content item:
+
+* stateless interface
+
+ curl -i -X POST -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/engines
+
+* stateful interface
+
+ curl -i -X PUT -H "Content-Type:text/plain" -T testfile.txt http://localhost:8080/contenthub/content/someFileId
+
+Alternatively, the Stanbol web interface can be used for submitting documents
+and viewing the metadata at
+
+ http://localhost:8080/contenthub
+
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml Mon Aug 20 12:11:01 2012
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <groupId>org.apache.stanbol</groupId>
+ <version>0.9.0-incubating</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightcandidates</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Candidates</name>
+ <description>an enhancement engine for associating candidate DBpedia URIs to spotted surfaceForms</description>
+
+ <inceptionYear>2010</inceptionYear>
+
+ <!--scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm-->
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;version=${project.version}
+ </Export-Package>
+ <Embed-Dependency>
+ </Embed-Dependency>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 licensed files: See src/test/resources/README -->
+ <exclude>src/test/resources/en.txt</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/license/THIRD-PARTY.properties Mon Aug 20 12:11:01 2012
@@ -0,0 +1,17 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache License
+# - Common Development and Distribution License (CDDL) v1.0
+# - Common Public License Version 1.0
+# - ICU License
+# - MIT License
+# - The Apache Software License, Version 2.0
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Wed Feb 15 19:06:13 CET 2012
+javax.servlet--servlet-api--2.4=Common Development And Distribution License (CDDL), Version 1.0
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the candidate ressources given by DBPedia Spotlight Candidates.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class CandidateResource {
+
+ public String label;
+ public String uri;
+ public double contextualScore;
+ public double percentageOfSecondRank;
+ public double support;
+ public double priorScore;
+ public double finalScore;
+
+ public String toString() {
+ return String.format( "[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, " +
+ "percentageOfSecondRank=%d, contextualScore=%d]", label, uri, contextualScore, percentageOfSecondRank, support, priorScore, finalScore ) ;
+ }
+}
Added: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java?rev=1374984&view=auto
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java (added)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java Mon Aug 20 12:11:01 2012
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;
+
+import java.util.ArrayList;
+import java.util.List;
+
+//import org.apache.clerezza.rdf.core.Resource;
+
+/**
+ * Stores the surface forms given by DBPedia Spotlight Candidates.
+ *
+ * @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
+ */
+public class DBPSLSurfaceForm {
+
+ public String name;
+ public String type;
+ public Integer offset;
+ public List<CandidateResource> resources = new ArrayList<CandidateResource>();
+
+ public String toString() {
+ return String.format( "[name=%s, offset=%i, type=%s]", name, offset, type ) ;
+ }
+}