You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/20 15:48:32 UTC
svn commit: r1375018 [1/3] - in
/incubator/stanbol/branches/dbpedia-spotlight-engines/engines:
dbpspotlightannotate/
dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/
dbpspotlightannotate/src/test/java/org/apa...
Author: rwesten
Date: Mon Aug 20 13:48:31 2012
New Revision: 1375018
URL: http://svn.apache.org/viewvc?rev=1375018&view=rev
Log:
STANBOL-706: Formatted Source Code; Removed README.md files that where actually copied version from an other engine. NOTE: This does not contain any change in the source other than formatting
Removed:
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/README.md
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/README.md
Modified:
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSpotlightCandidatesEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/core/DBPSpotlightCandidatesEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/DBPSLAnnotation.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/DBPSpotlightDisambiguateEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightdisambiguate/core/DBPSpotlightDisambiguateEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightdisambiguate/src/test/resources/spots.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSLSurfaceForm.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/DBPSpotlightSpotEnhancementEngine.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/XMLParser.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightspot/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightspot/core/DBPSpotlightSpotEnhancementTest.java
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/geonames/pom.xml
incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect/ (props changed)
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/pom.xml Mon Aug 20 13:48:31 2012
@@ -1,122 +1,110 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <parent>
- <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
- <groupId>org.apache.stanbol</groupId>
- <version>0.9.0-incubating</version>
- <relativePath>../../parent</relativePath>
- </parent>
-
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightannotate</artifactId>
- <packaging>bundle</packaging>
+ <parent>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <groupId>org.apache.stanbol</groupId>
+ <version>0.9.0-incubating</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightannotate</artifactId>
+ <packaging>bundle</packaging>
- <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
- <description>just tests the Stanbol Engine Import
+ <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Annotate</name>
+ <description>just tests the Stanbol Engine Import
</description>
- <inceptionYear>2010</inceptionYear>
+ <inceptionYear>2010</inceptionYear>
- <!--scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </developerConnection>
- <url>http://incubator.apache.org/stanbol/</url>
- </scm-->
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.felix</groupId>
- <artifactId>maven-bundle-plugin</artifactId>
- <extensions>true</extensions>
- <configuration>
- <instructions>
- <Export-Package>
- org.apache.stanbol.enhancer.engines.dbpspotlightannotate;version=${project.version}
- </Export-Package>
- <Embed-Dependency>
- </Embed-Dependency>
- </instructions>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.felix</groupId>
- <artifactId>maven-scr-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.rat</groupId>
- <artifactId>apache-rat-plugin</artifactId>
- <configuration>
- <excludes>
- <!-- AL20 licensed files: See src/test/resources/README -->
- <exclude>src/test/resources/en.txt</exclude>
- </excludes>
- </configuration>
- </plugin>
- </plugins>
- </build>
-
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- </properties>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.felix</groupId>
- <artifactId>org.apache.felix.scr.annotations</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.clerezza</groupId>
- <artifactId>rdf.core</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </dependency>
-
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
+ <!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.enhancer.engines.dbpspotlightannotate;version=${project.version}
+ </Export-Package>
+ <Embed-Dependency>
+ </Embed-Dependency>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 licensed files: See src/test/resources/README -->
+ <exclude>src/test/resources/en.txt</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
</project>
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSLAnnotation.java Mon Aug 20 13:48:31 2012
@@ -22,7 +22,7 @@ import org.apache.clerezza.rdf.core.Reso
/**
* Contains a result given by DBPedia Spotlight..
- *
+ *
* @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
*/
public class DBPSLAnnotation {
@@ -34,26 +34,27 @@ public class DBPSLAnnotation {
public Integer offset;
public Double similarityScore;
public Double percentageOfSecondRank;
-
- public HashSet<String> getTypeNames() {
- if (types != null) {
- HashSet<String> t = new HashSet<String>();
- String[] typex = types.split(",");
- for ( String type: typex ) {
- // make the returned types referenceable
- String deref = type.replace( "DBpedia:", "dbp-ont:")
- .replace( "Freebase:", "http://www.freebase.com/schema")
- .replace( "Schema:", "http://www.schema.org/");
- t.add(deref);
- }
- return t;
- }
- return null;
- }
+ public HashSet<String> getTypeNames() {
+ if (types != null) {
+ HashSet<String> t = new HashSet<String>();
+ String[] typex = types.split(",");
+ for (String type : typex) {
+ // make the returned types referenceable
+ String deref = type.replace("DBpedia:", "dbp-ont:")
+ .replace("Freebase:", "http://www.freebase.com/schema")
+ .replace("Schema:", "http://www.schema.org/");
+ t.add(deref);
+ }
+ return t;
+ }
+ return null;
+ }
public String toString() {
- return String.format("[uri=%s, support=%i, types=%s, surfaceForm=\"%s\", offset=%i, similarityScore=%d, percentageOfSecondRank=%d]",
- uri, support, types, surfaceForm, offset, similarityScore, percentageOfSecondRank);
+ return String
+ .format("[uri=%s, support=%i, types=%s, surfaceForm=\"%s\", offset=%i, similarityScore=%d, percentageOfSecondRank=%d]",
+ uri, support, types, surfaceForm, offset,
+ similarityScore, percentageOfSecondRank);
}
}
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/DBPSpotlightAnnotateEnhancementEngine.java Mon Aug 20 13:48:31 2012
@@ -79,266 +79,319 @@ import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
/**
- * {@link DBPSpotlightAnnotateEnhancementEngine} provides functionality to enhance a document
- * using the DBpedia Spotlight /annotate REST endpoint
+ * {@link DBPSpotlightAnnotateEnhancementEngine} provides functionality to
+ * enhance a document using the DBpedia Spotlight /annotate REST endpoint
+ *
* @author Iavor Jelev, Babelmonkeys (GzEvD)
*/
-@Component(
- metatype = true,
- immediate = true,
- label = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.name",
- description = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.description")
+@Component(metatype = true, immediate = true, label = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.name", description = "%stanbol.DBPSpotlightAnnotateEnhancementEngine.description")
@Service
-@Properties(value={
- @Property(name=EnhancementEngine.PROPERTY_NAME,value="dbpspotlightannotate")
-})
-public class DBPSpotlightAnnotateEnhancementEngine
- extends AbstractEnhancementEngine<IOException,RuntimeException>
- implements EnhancementEngine, ServiceProperties {
-
- /**
- * a configurable value of the text segment length to check
- */
- @Property(value = "http://spotlight.dbpedia.org/rest/annotate")
- public static final String SL_URL_KEY = "stanbol.DBPSpotlightAnnotateEnhancementEngine.url";
-
- @Property(value = "NESpotter")
- public static final String SL_SPOTTER = "stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter";
-
- @Property(value = "")
- public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator";
-
- @Property()
- public static final String SL_RESTRICTION = "stanbol.DBPSpotlightAnnotateEnhancementEngine.types";
-
- @Property()
- public static final String SL_SPARQL = "stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql";
-
- @Property()
- public static final String SL_SUPPORT = "stanbol.DBPSpotlightAnnotateEnhancementEngine.support";
-
- @Property()
- public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence";
-
-
- /**
- * The default value for the Execution of this Engine.
- */
- public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 27;
-
- /** This contains the only MIME type directly supported by this enhancement engine. */
- private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
- /** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
- private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
-
- /** holds the logger. */
- private static final Logger log = LoggerFactory.getLogger(DBPSpotlightAnnotateEnhancementEngine.class);
-
- /** holds the url of the Spotlight REST endpoint */
- private String spotlightUrl;
- /** holds the chosen of spotter to be used */
- private String spotlightSpotter;
- /** holds the chosen of disambiguator to be used */
- private String spotlightDisambiguator;
- /** holds the type restriction for the results, if the user wishes one */
- private String spotlightTypesRestriction;
- /** holds the chosen minimal support value */
- private String spotlightSupport;
- /** holds the chosen minimal confidence value */
- private String spotlightConfidence;
- /** holds the sparql restriction for the results, if the user wishes one */
- private String spotlightSparql;
-
-
- /**
- * Initialize all parameters from the configuration panel, or with their default values
- * @param ce the {@link ComponentContext}
- */
- @SuppressWarnings("unchecked")
- protected void activate( ComponentContext ce ) throws ConfigurationException, IOException {
-
- super.activate(ce);
-
- Dictionary<String, Object> properties = ce.getProperties();
- spotlightUrl = properties.get( SL_URL_KEY ) == null ? "http://spotlight.dbpedia.org/rest/annotate" : (String) properties.get( SL_URL_KEY );
- spotlightSpotter = properties.get( SL_SPOTTER ) == null ? null : (String) properties.get( SL_SPOTTER );
- spotlightDisambiguator = properties.get( SL_DISAMBIGUATOR ) == null ? null : (String) properties.get( SL_DISAMBIGUATOR );
- spotlightTypesRestriction = properties.get( SL_RESTRICTION ) == null ? null : (String) properties.get( SL_RESTRICTION );
- spotlightSparql = properties.get( SL_SPARQL ) == null ? null : (String) properties.get( SL_SPARQL );
- spotlightSupport = properties.get( SL_SUPPORT ) == null ? null : (String) properties.get( SL_SUPPORT );
- spotlightConfidence = properties.get( SL_CONFIDENCE ) == null ? null : (String) properties.get( SL_CONFIDENCE );
- }
-
-
-
- /**
- * Check if the content can be enhanced
- * @param ci the {@link ContentItem}
- */
- public int canEnhance( ContentItem ci ) throws EngineException {
- if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null){
- return ENHANCE_SYNCHRONOUS;
- } else {
- return CANNOT_ENHANCE;
- }
- }
-
-
- /**
- * Calculate the enhancements by doing a POST request to the DBpedia Spotlight endpoint and processing the results
- * @param ci the {@link ContentItem}
- */
- public void computeEnhancements( ContentItem ci ) throws EngineException {
- Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
- if(contentPart == null){
- throw new IllegalStateException("No ContentPart with Mimetype '"
- + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri()
- + ": This is also checked in the canEnhance method! -> This "
- + "indicated an Bug in the implementation of the "
- + "EnhancementJobManager!");
- }
- String text = "";
- try {
- text = ContentItemHelper.getText(contentPart.getValue());
- } catch (IOException e) {
- throw new InvalidContentException(this, ci, e);
- }
-
- Collection<DBPSLAnnotation> dbpslGraph = doPostRequest( text );
- if ( dbpslGraph != null ) {
- //Acquire a write lock on the ContentItem when adding the enhancements
- ci.getLock().writeLock().lock();
- try {
- createEnhancements( dbpslGraph, ci);
- if (log.isDebugEnabled()) {
- Serializer serializer = Serializer.getInstance();
- ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
- serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
- try {
- log.debug("DBPedia Spotlight Enhancements:\n{}",debugStream.toString("UTF-8"));
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- }
- }
- } finally {
- ci.getLock().writeLock().unlock();
- }
- }
- }
-
-
- /**
- * This generates enhancement structures for the entities from DBPedia Spotlight
- * and adds them to the content item's metadata.
- * For each entity a TextAnnotation and an EntityAnnotation are created.
- * An EntityAnnotation can relate to several TextAnnotations.
- *
- * @param occs a Collection of entity information
- * @param ci the content item
- */
- public void createEnhancements( Collection<DBPSLAnnotation> occs, ContentItem ci ) {
- LiteralFactory literalFactory = LiteralFactory.getInstance();
- final Language language; // used for plain literals representing parts fo the content
- String langString = getMetadataLanguage(ci.getMetadata(), null);
-
- if(langString != null && !langString.isEmpty()){
- language = new Language(langString);
- } else {
- language = null;
- }
-
- HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
-
- for (DBPSLAnnotation occ : occs) {
- UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(
- ci, this);
- MGraph model = ci.getMetadata();
- model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occ.surfaceForm,language)));
- model.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occ.offset)));
- model.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occ.offset + occ.surfaceForm.length())));
- // TODO ################## model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occ.context,language)));
- //create EntityAnnotation only once but add a reference to the textAnnotation
- if (entityAnnotationMap.containsKey(occ.uri)) {
- model.add(new TripleImpl(entityAnnotationMap.get(occ.uri), DC_RELATION, textAnnotation));
- } else {
- UriRef entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
- entityAnnotationMap.put(occ.uri, entityAnnotation);
- Literal label = new PlainLiteralImpl( occ.surfaceForm, new Language("en"));
- model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
- model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
-
- HashSet<String> t = occ.getTypeNames();
- if ( t != null ) {
- Iterator<String> it = t.iterator();
- while ( it.hasNext() ) {
- UriRef annotationType = new UriRef( it.next() );
- model.add( new TripleImpl( entityAnnotation, ENHANCER_ENTITY_TYPE, annotationType ) );
- model.add( new TripleImpl( textAnnotation, DC_TYPE, annotationType ) );
- }
- }
- model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
- }
- }
- }
-
-
-
-
- /**
- * Sends a POST request to the DBpediaSpotlight endpoint.
- * @param text a <code>String</code> with the text to be analyzed
- * @return a <code>Collection<DBPSLAnnotation></code> with the server response
- * @throws EngineException if the request cannot be sent
- */
- public Collection<DBPSLAnnotation> doPostRequest( String text ) throws EngineException {
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "dbpspotlightannotate") })
+public class DBPSpotlightAnnotateEnhancementEngine extends
+ AbstractEnhancementEngine<IOException, RuntimeException> implements
+ EnhancementEngine, ServiceProperties {
+
+ /**
+ * a configurable value of the text segment length to check
+ */
+ @Property(value = "http://spotlight.dbpedia.org/rest/annotate")
+ public static final String SL_URL_KEY = "stanbol.DBPSpotlightAnnotateEnhancementEngine.url";
+
+ @Property(value = "NESpotter")
+ public static final String SL_SPOTTER = "stanbol.DBPSpotlightAnnotateEnhancementEngine.spotter";
+
+ @Property(value = "")
+ public static final String SL_DISAMBIGUATOR = "stanbol.DBPSpotlightAnnotateEnhancementEngine.disambiguator";
+
+ @Property()
+ public static final String SL_RESTRICTION = "stanbol.DBPSpotlightAnnotateEnhancementEngine.types";
+
+ @Property()
+ public static final String SL_SPARQL = "stanbol.DBPSpotlightAnnotateEnhancementEngine.sparql";
+
+ @Property()
+ public static final String SL_SUPPORT = "stanbol.DBPSpotlightAnnotateEnhancementEngine.support";
+
+ @Property()
+ public static final String SL_CONFIDENCE = "stanbol.DBPSpotlightAnnotateEnhancementEngine.confidence";
+
+ /**
+ * The default value for the Execution of this Engine.
+ */
+ public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION - 27;
+
+ /**
+ * This contains the only MIME type directly supported by this enhancement
+ * engine.
+ */
+ private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+ /** Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE} */
+ private static final Set<String> SUPPORTED_MIMTYPES = Collections
+ .singleton(TEXT_PLAIN_MIMETYPE);
+
+ /** holds the logger. */
+ private static final Logger log = LoggerFactory
+ .getLogger(DBPSpotlightAnnotateEnhancementEngine.class);
+
+ /** holds the url of the Spotlight REST endpoint */
+ private String spotlightUrl;
+ /** holds the chosen of spotter to be used */
+ private String spotlightSpotter;
+ /** holds the chosen of disambiguator to be used */
+ private String spotlightDisambiguator;
+ /** holds the type restriction for the results, if the user wishes one */
+ private String spotlightTypesRestriction;
+ /** holds the chosen minimal support value */
+ private String spotlightSupport;
+ /** holds the chosen minimal confidence value */
+ private String spotlightConfidence;
+ /** holds the sparql restriction for the results, if the user wishes one */
+ private String spotlightSparql;
+
+ /**
+ * Initialize all parameters from the configuration panel, or with their
+ * default values
+ *
+ * @param ce
+ * the {@link ComponentContext}
+ */
+ @SuppressWarnings("unchecked")
+ protected void activate(ComponentContext ce) throws ConfigurationException,
+ IOException {
+
+ super.activate(ce);
+
+ Dictionary<String, Object> properties = ce.getProperties();
+ spotlightUrl = properties.get(SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
+ : (String) properties.get(SL_URL_KEY);
+ spotlightSpotter = properties.get(SL_SPOTTER) == null ? null
+ : (String) properties.get(SL_SPOTTER);
+ spotlightDisambiguator = properties.get(SL_DISAMBIGUATOR) == null ? null
+ : (String) properties.get(SL_DISAMBIGUATOR);
+ spotlightTypesRestriction = properties.get(SL_RESTRICTION) == null ? null
+ : (String) properties.get(SL_RESTRICTION);
+ spotlightSparql = properties.get(SL_SPARQL) == null ? null
+ : (String) properties.get(SL_SPARQL);
+ spotlightSupport = properties.get(SL_SUPPORT) == null ? null
+ : (String) properties.get(SL_SUPPORT);
+ spotlightConfidence = properties.get(SL_CONFIDENCE) == null ? null
+ : (String) properties.get(SL_CONFIDENCE);
+ }
+
+ /**
+ * Check if the content can be enhanced
+ *
+ * @param ci
+ * the {@link ContentItem}
+ */
+ public int canEnhance(ContentItem ci) throws EngineException {
+ if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+ return ENHANCE_SYNCHRONOUS;
+ } else {
+ return CANNOT_ENHANCE;
+ }
+ }
+
+ /**
+ * Calculate the enhancements by doing a POST request to the DBpedia
+ * Spotlight endpoint and processing the results
+ *
+ * @param ci
+ * the {@link ContentItem}
+ */
+ public void computeEnhancements(ContentItem ci) throws EngineException {
+ Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci,
+ SUPPORTED_MIMTYPES);
+ if (contentPart == null) {
+ throw new IllegalStateException(
+ "No ContentPart with Mimetype '"
+ + TEXT_PLAIN_MIMETYPE
+ + "' found for ContentItem "
+ + ci.getUri()
+ + ": This is also checked in the canEnhance method! -> This "
+ + "indicated an Bug in the implementation of the "
+ + "EnhancementJobManager!");
+ }
+ String text = "";
+ try {
+ text = ContentItemHelper.getText(contentPart.getValue());
+ } catch (IOException e) {
+ throw new InvalidContentException(this, ci, e);
+ }
+
+ Collection<DBPSLAnnotation> dbpslGraph = doPostRequest(text);
+ if (dbpslGraph != null) {
+ // Acquire a write lock on the ContentItem when adding the
+ // enhancements
+ ci.getLock().writeLock().lock();
+ try {
+ createEnhancements(dbpslGraph, ci);
+ if (log.isDebugEnabled()) {
+ Serializer serializer = Serializer.getInstance();
+ ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
+ serializer.serialize(debugStream, ci.getMetadata(),
+ "application/rdf+xml");
+ try {
+ log.debug("DBPedia Spotlight Enhancements:\n{}",
+ debugStream.toString("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
+ }
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ }
+ }
+
+ /**
+ * This generates enhancement structures for the entities from DBPedia
+ * Spotlight and adds them to the content item's metadata. For each entity a
+ * TextAnnotation and an EntityAnnotation are created. An EntityAnnotation
+ * can relate to several TextAnnotations.
+ *
+ * @param occs
+ * a Collection of entity information
+ * @param ci
+ * the content item
+ */
+ public void createEnhancements(Collection<DBPSLAnnotation> occs,
+ ContentItem ci) {
+ LiteralFactory literalFactory = LiteralFactory.getInstance();
+ final Language language; // used for plain literals representing parts
+ // fo the content
+ String langString = getMetadataLanguage(ci.getMetadata(), null);
+
+ if (langString != null && !langString.isEmpty()) {
+ language = new Language(langString);
+ } else {
+ language = null;
+ }
+
+ HashMap<Resource, UriRef> entityAnnotationMap = new HashMap<Resource, UriRef>();
+
+ for (DBPSLAnnotation occ : occs) {
+ UriRef textAnnotation = EnhancementEngineHelper
+ .createTextEnhancement(ci, this);
+ MGraph model = ci.getMetadata();
+ model.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
+ new PlainLiteralImpl(occ.surfaceForm, language)));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_START,
+ literalFactory.createTypedLiteral(occ.offset)));
+ model.add(new TripleImpl(textAnnotation, ENHANCER_END,
+ literalFactory.createTypedLiteral(occ.offset
+ + occ.surfaceForm.length())));
+ // TODO ################## model.add(new TripleImpl(textAnnotation,
+ // ENHANCER_SELECTION_CONTEXT, new
+ // PlainLiteralImpl(occ.context,language)));
+ // create EntityAnnotation only once but add a reference to the
+ // textAnnotation
+ if (entityAnnotationMap.containsKey(occ.uri)) {
+ model.add(new TripleImpl(entityAnnotationMap.get(occ.uri),
+ DC_RELATION, textAnnotation));
+ } else {
+ UriRef entityAnnotation = EnhancementEngineHelper
+ .createEntityEnhancement(ci, this);
+ entityAnnotationMap.put(occ.uri, entityAnnotation);
+ Literal label = new PlainLiteralImpl(occ.surfaceForm,
+ new Language("en"));
+ model.add(new TripleImpl(entityAnnotation, DC_RELATION,
+ textAnnotation));
+ model.add(new TripleImpl(entityAnnotation,
+ ENHANCER_ENTITY_LABEL, label));
+
+ HashSet<String> t = occ.getTypeNames();
+ if (t != null) {
+ Iterator<String> it = t.iterator();
+ while (it.hasNext()) {
+ UriRef annotationType = new UriRef(it.next());
+ model.add(new TripleImpl(entityAnnotation,
+ ENHANCER_ENTITY_TYPE, annotationType));
+ model.add(new TripleImpl(textAnnotation, DC_TYPE,
+ annotationType));
+ }
+ }
+ model.add(new TripleImpl(entityAnnotation,
+ ENHANCER_ENTITY_REFERENCE, occ.uri));
+ }
+ }
+ }
+
+ /**
+ * Sends a POST request to the DBpediaSpotlight endpoint.
+ *
+ * @param text
+ * a <code>String</code> with the text to be analyzed
+ * @return a <code>Collection<DBPSLAnnotation></code> with the server
+ * response
+ * @throws EngineException
+ * if the request cannot be sent
+ */
+ public Collection<DBPSLAnnotation> doPostRequest(String text)
+ throws EngineException {
StringBuilder data = new StringBuilder();
try {
- if ( spotlightSpotter != null && !spotlightSpotter.isEmpty() )
- data.append( URLEncoder.encode( "spotter", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSpotter, "UTF-8" ) + "&" );
- if ( spotlightDisambiguator != null && !spotlightDisambiguator.isEmpty() )
- data.append( URLEncoder.encode( "disambiguator", "UTF-8" ) + "=" + URLEncoder.encode( spotlightDisambiguator, "UTF-8" ) + "&" );
- if ( spotlightTypesRestriction != null && !spotlightTypesRestriction.isEmpty() )
- data.append( URLEncoder.encode( "types", "UTF-8" ) + "=" + URLEncoder.encode( spotlightTypesRestriction, "UTF-8" ) + "&" );
- if ( spotlightSupport != null && !spotlightSupport.isEmpty() )
- data.append( URLEncoder.encode( "support", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSupport, "UTF-8" ) + "&" );
- if ( spotlightConfidence != null && !spotlightConfidence.isEmpty() )
- data.append( URLEncoder.encode( "confidence", "UTF-8" ) + "=" + URLEncoder.encode( spotlightConfidence, "UTF-8" ) + "&" );
- if ( spotlightSparql != null && !spotlightSparql.isEmpty() && spotlightTypesRestriction == null )
- data.append( URLEncoder.encode( "sparql", "UTF-8" ) + "=" + URLEncoder.encode( spotlightSparql, "UTF-8" ) + "&" );
- data.append( URLEncoder.encode( "text", "UTF-8" ) + "=" + URLEncoder.encode( text, "UTF-8" ) );
+ if (spotlightSpotter != null && !spotlightSpotter.isEmpty())
+ data.append(URLEncoder.encode("spotter", "UTF-8") + "="
+ + URLEncoder.encode(spotlightSpotter, "UTF-8") + "&");
+ if (spotlightDisambiguator != null
+ && !spotlightDisambiguator.isEmpty())
+ data.append(URLEncoder.encode("disambiguator", "UTF-8") + "="
+ + URLEncoder.encode(spotlightDisambiguator, "UTF-8")
+ + "&");
+ if (spotlightTypesRestriction != null
+ && !spotlightTypesRestriction.isEmpty())
+ data.append(URLEncoder.encode("types", "UTF-8") + "="
+ + URLEncoder.encode(spotlightTypesRestriction, "UTF-8")
+ + "&");
+ if (spotlightSupport != null && !spotlightSupport.isEmpty())
+ data.append(URLEncoder.encode("support", "UTF-8") + "="
+ + URLEncoder.encode(spotlightSupport, "UTF-8") + "&");
+ if (spotlightConfidence != null && !spotlightConfidence.isEmpty())
+ data.append(URLEncoder.encode("confidence", "UTF-8") + "="
+ + URLEncoder.encode(spotlightConfidence, "UTF-8") + "&");
+ if (spotlightSparql != null && !spotlightSparql.isEmpty()
+ && spotlightTypesRestriction == null)
+ data.append(URLEncoder.encode("sparql", "UTF-8") + "="
+ + URLEncoder.encode(spotlightSparql, "UTF-8") + "&");
+ data.append(URLEncoder.encode("text", "UTF-8") + "="
+ + URLEncoder.encode(text, "UTF-8"));
} catch (UnsupportedEncodingException e) {
- throw new EngineException( "Data for the httprequest could not be converted. Error: " + e.getMessage() );
+ throw new EngineException(
+ "Data for the httprequest could not be converted. Error: "
+ + e.getMessage());
}
- HttpURLConnection connection = null;
- StringBuffer response = new StringBuffer();
-
- try {
- //Create connection
- URL url = new URL( spotlightUrl );
- connection = ( HttpURLConnection )url.openConnection();
- connection.setRequestMethod( "POST" );
- connection.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded" );
- connection.setRequestProperty( "Accept", "text/xml" );
-
- connection.setUseCaches( false );
- connection.setDoInput( true );
- connection.setDoOutput( true );
-
- //Send request
- DataOutputStream wr = new DataOutputStream (
- connection.getOutputStream ());
- wr.writeBytes( data.toString() );
- wr.flush ();
- wr.close ();
+ HttpURLConnection connection = null;
+ StringBuffer response = new StringBuffer();
- //Get Response
+ try {
+ // Create connection
+ URL url = new URL(spotlightUrl);
+ connection = (HttpURLConnection) url.openConnection();
+ connection.setRequestMethod("POST");
+ connection.setRequestProperty("Content-Type",
+ "application/x-www-form-urlencoded");
+ connection.setRequestProperty("Accept", "text/xml");
+
+ connection.setUseCaches(false);
+ connection.setDoInput(true);
+ connection.setDoOutput(true);
+
+ // Send request
+ DataOutputStream wr = new DataOutputStream(
+ connection.getOutputStream());
+ wr.writeBytes(data.toString());
+ wr.flush();
+ wr.close();
+
+ // Get Response
InputStream is = connection.getInputStream();
- BufferedReader rd = new BufferedReader( new InputStreamReader( is ) );
+ BufferedReader rd = new BufferedReader(new InputStreamReader(is));
String line;
- while((line = rd.readLine()) != null) {
- response.append( line );
- response.append( '\r' );
+ while ((line = rd.readLine()) != null) {
+ response.append(line);
+ response.append('\r');
}
rd.close();
@@ -349,84 +402,91 @@ public class DBPSpotlightAnnotateEnhance
} finally {
- if(connection != null) {
- connection.disconnect();
+ if (connection != null) {
+ connection.disconnect();
}
}
-
- // Parse the response
- XMLParser xmlParser = new XMLParser();
+ // Parse the response
+ XMLParser xmlParser = new XMLParser();
try {
- Document xmlDoc = xmlParser.loadXMLFromString( response.toString() );
- NodeList nlist = xmlParser.getElementsByTagName( xmlDoc, "Resource" );
- Collection<DBPSLAnnotation> annos = this.getAnnotations( nlist );
-
- return annos;
- } catch ( Exception e) {
- throw new EngineException( "Response XML could not be parsed. Error: " + e.getMessage() );
- }
- }
-
-
- /**
- * This method creates the Collection of Annotations, which the method <code>createEnhancement</code>
- * adds to the meta data of the content item.
- * @param nList NodeList of all Resources contained in the XML response from DBpedia Spotlight
- * @return a Collection<DBPSLAnnotation> with all annotations
- */
- private Collection<DBPSLAnnotation> getAnnotations( NodeList nList ) {
+ Document xmlDoc = xmlParser.loadXMLFromString(response.toString());
+ NodeList nlist = xmlParser.getElementsByTagName(xmlDoc, "Resource");
+ Collection<DBPSLAnnotation> annos = this.getAnnotations(nlist);
+
+ return annos;
+ } catch (Exception e) {
+ throw new EngineException(
+ "Response XML could not be parsed. Error: "
+ + e.getMessage());
+ }
+ }
+
+ /**
+ * This method creates the Collection of Annotations, which the method
+ * <code>createEnhancement</code> adds to the meta data of the content item.
+ *
+ * @param nList
+ * NodeList of all Resources contained in the XML response from
+ * DBpedia Spotlight
+ * @return a Collection<DBPSLAnnotation> with all annotations
+ */
+ private Collection<DBPSLAnnotation> getAnnotations(NodeList nList) {
Collection<DBPSLAnnotation> dbpslAnnos = new HashSet<DBPSLAnnotation>();
-
+
for (int temp = 0; temp < nList.getLength(); temp++) {
- DBPSLAnnotation dbpslann = new DBPSLAnnotation();
- Element node = (Element) nList.item(temp);
- dbpslann.uri = new UriRef( node.getAttribute( "URI" ) );
- dbpslann.support = (new Integer( node.getAttribute( "support" ) ) ).intValue();
- dbpslann.types = node.getAttribute( "types" );
- dbpslann.surfaceForm = node.getAttribute( "surfaceForm" );
- dbpslann.offset = (new Integer( node.getAttribute( "offset" ) ) ).intValue();
- dbpslann.similarityScore = (new Double( node.getAttribute( "similarityScore" ) ) ).doubleValue();
- dbpslann.percentageOfSecondRank = (new Double( node.getAttribute( "percentageOfSecondRank" ) ) ).doubleValue();
-
- dbpslAnnos.add( dbpslann );
+ DBPSLAnnotation dbpslann = new DBPSLAnnotation();
+ Element node = (Element) nList.item(temp);
+ dbpslann.uri = new UriRef(node.getAttribute("URI"));
+ dbpslann.support = (new Integer(node.getAttribute("support")))
+ .intValue();
+ dbpslann.types = node.getAttribute("types");
+ dbpslann.surfaceForm = node.getAttribute("surfaceForm");
+ dbpslann.offset = (new Integer(node.getAttribute("offset")))
+ .intValue();
+ dbpslann.similarityScore = (new Double(
+ node.getAttribute("similarityScore"))).doubleValue();
+ dbpslann.percentageOfSecondRank = (new Double(
+ node.getAttribute("percentageOfSecondRank"))).doubleValue();
+
+ dbpslAnnos.add(dbpslann);
}
-
+
return dbpslAnnos;
}
+ public Map<String, Object> getServiceProperties() {
+ return Collections.unmodifiableMap(Collections.singletonMap(
+ ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+ }
- public Map<String, Object> getServiceProperties() {
- return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
- }
-
-
- public String getMetadataLanguage(MGraph model, NonLiteral subj) {
- Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource langNode = it.next().getObject();
- return getLexicalForm(langNode);
- }
- return null;
- }
-
- public String getLexicalForm(Resource res) {
- if (res == null) {
- return null;
- } else if (res instanceof Literal) {
- return ((Literal) res).getLexicalForm();
- } else {
- return res.toString();
- }
- }
-
-
- /**
- * This method is used by the test class to set the endpoint url
- * @param url String the url of the Spotlight endpoint
- */
- public void setEndpointUrl( String url ) {
- spotlightUrl = url;
- }
+ public String getMetadataLanguage(MGraph model, NonLiteral subj) {
+ Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
+ if (it.hasNext()) {
+ Resource langNode = it.next().getObject();
+ return getLexicalForm(langNode);
+ }
+ return null;
+ }
+
+ public String getLexicalForm(Resource res) {
+ if (res == null) {
+ return null;
+ } else if (res instanceof Literal) {
+ return ((Literal) res).getLexicalForm();
+ } else {
+ return res.toString();
+ }
+ }
+
+ /**
+ * This method is used by the test class to set the endpoint url
+ *
+ * @param url
+ * String the url of the Spotlight endpoint
+ */
+ public void setEndpointUrl(String url) {
+ spotlightUrl = url;
+ }
}
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/XMLParser.java Mon Aug 20 13:48:31 2012
@@ -29,38 +29,37 @@ import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
-
/**
* Parses the XML results given by DBPedia Spotlight.
- *
+ *
* @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
*/
public class XMLParser {
-
- public NodeList getElementsByTagName( Document doc, String tagName ) {
-
- return doc.getElementsByTagName( tagName );
+
+ public NodeList getElementsByTagName(Document doc, String tagName) {
+
+ return doc.getElementsByTagName(tagName);
}
-
- public Document loadXMLFromString( String xml ) throws SAXException, IOException {
- Document doc = loadXMLFromInputStream( new ByteArrayInputStream( xml.getBytes() ) );
+ public Document loadXMLFromString(String xml) throws SAXException,
+ IOException {
+ Document doc = loadXMLFromInputStream(new ByteArrayInputStream(
+ xml.getBytes()));
doc.getDocumentElement().normalize();
return doc;
}
-
- public Document loadXMLFromInputStream( InputStream is ) throws SAXException, IOException {
+ public Document loadXMLFromInputStream(InputStream is) throws SAXException,
+ IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- factory.setNamespaceAware( true );
- DocumentBuilder builder = null;
+ factory.setNamespaceAware(true);
+ DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
+ } catch (ParserConfigurationException ex) {
}
- catch ( ParserConfigurationException ex ) {
- }
Document doc = builder.parse(is);
is.close();
doc.getDocumentElement().normalize();
@@ -68,14 +67,14 @@ public class XMLParser {
return doc;
}
-
- public Document loadXMLFromFile( String filePath ) throws ParserConfigurationException, SAXException, IOException {
- File fXmlFile = new File( filePath );
+ public Document loadXMLFromFile(String filePath)
+ throws ParserConfigurationException, SAXException, IOException {
+ File fXmlFile = new File(filePath);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
-
+
return doc;
}
}
\ No newline at end of file
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightannotate/src/test/java/org/apache/stanbol/enhancer/engines/dbpspotlightannotate/core/DBPSpotlightAnnotateEnhancementTest.java Mon Aug 20 13:48:31 2012
@@ -29,38 +29,43 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * This class provides a JUnit test for DBpedia Spotlight Annotate EnhancementEngine.
+ * This class provides a JUnit test for DBpedia Spotlight Annotate
+ * EnhancementEngine.
+ *
* @author Iavor Jelev, babelmonkeys / GzEvD
*/
public class DBPSpotlightAnnotateEnhancementTest {
- /**
- * This contains the logger.
- */
- private static final Logger LOG = LoggerFactory.getLogger(DBPSpotlightAnnotateEnhancementTest.class);
- private static String SPL_URL = System.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY) == null ?
- "http://spotlight.dbpedia.org/rest/annotate" : (String) System.getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY);
- private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
- private static DBPSpotlightAnnotateEnhancementEngine dbpslight;
-
- @BeforeClass
- public static void oneTimeSetup() throws ConfigurationException {
- dbpslight = new DBPSpotlightAnnotateEnhancementEngine();
- dbpslight.setEndpointUrl( SPL_URL );
- }
+ /**
+ * This contains the logger.
+ */
+ private static final Logger LOG = LoggerFactory
+ .getLogger(DBPSpotlightAnnotateEnhancementTest.class);
+ private static String SPL_URL = System
+ .getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY) == null ? "http://spotlight.dbpedia.org/rest/annotate"
+ : (String) System
+ .getProperty(DBPSpotlightAnnotateEnhancementEngine.SL_URL_KEY);
+ private static String TEST_TEXT = "President Obama is meeting Angela Merkel in Berlin on Monday";
+ private static DBPSpotlightAnnotateEnhancementEngine dbpslight;
-
- @Test
- public void testEntityExtraction() {
- Collection<DBPSLAnnotation> entities;
+ @BeforeClass
+ public static void oneTimeSetup() throws ConfigurationException {
+ dbpslight = new DBPSpotlightAnnotateEnhancementEngine();
+ dbpslight.setEndpointUrl(SPL_URL);
+ }
+
+ @Test
+ public void testEntityExtraction() {
+ Collection<DBPSLAnnotation> entities;
try {
- entities = dbpslight.doPostRequest( TEST_TEXT );
- LOG.info("Found entities: {}",entities.size());
- LOG.debug("Entities:\n{}",entities);
- Assert.assertFalse("No entities were found!", entities.isEmpty());
+ entities = dbpslight.doPostRequest(TEST_TEXT);
+ LOG.info("Found entities: {}", entities.size());
+ LOG.debug("Entities:\n{}", entities);
+ Assert.assertFalse("No entities were found!", entities.isEmpty());
} catch (EngineException e) {
- Assert.assertFalse("An EngineException occurred! The message was: " + e.getMessage(), true);
+ Assert.assertFalse("An EngineException occurred! The message was: "
+ + e.getMessage(), true);
}
- }
+ }
}
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/pom.xml Mon Aug 20 13:48:31 2012
@@ -1,121 +1,109 @@
<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <parent>
- <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
- <groupId>org.apache.stanbol</groupId>
- <version>0.9.0-incubating</version>
- <relativePath>../../parent</relativePath>
- </parent>
-
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightcandidates</artifactId>
- <packaging>bundle</packaging>
-
- <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Candidates</name>
- <description>an enhancement engine for associating candidate DBpedia URIs to spotted surfaceForms</description>
-
- <inceptionYear>2010</inceptionYear>
-
- <!--scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
- </developerConnection>
- <url>http://incubator.apache.org/stanbol/</url>
- </scm-->
-
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.felix</groupId>
- <artifactId>maven-bundle-plugin</artifactId>
- <extensions>true</extensions>
- <configuration>
- <instructions>
- <Export-Package>
- org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;version=${project.version}
- </Export-Package>
- <Embed-Dependency>
- </Embed-Dependency>
- </instructions>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.felix</groupId>
- <artifactId>maven-scr-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.rat</groupId>
- <artifactId>apache-rat-plugin</artifactId>
- <configuration>
- <excludes>
- <!-- AL20 licensed files: See src/test/resources/README -->
- <exclude>src/test/resources/en.txt</exclude>
- </excludes>
- </configuration>
- </plugin>
- </plugins>
- </build>
-
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- </properties>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.stanbol</groupId>
- <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- </dependency>
-
- <dependency>
- <groupId>org.apache.felix</groupId>
- <artifactId>org.apache.felix.scr.annotations</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.clerezza</groupId>
- <artifactId>rdf.core</artifactId>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- </dependency>
-
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- </dependencies>
+ <parent>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <groupId>org.apache.stanbol</groupId>
+ <version>0.9.0-incubating</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engines.dbpspotlightcandidates</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancer Enhancement Engine : DBPedia Spotlight Candidates</name>
+ <description>an enhancement engine for associating candidate DBpedia URIs to spotted surfaceForms</description>
+
+ <inceptionYear>2010</inceptionYear>
+
+ <!--scm> <connection> scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </connection> <developerConnection> scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/tags/0.9.0-incubating/enhancer/engines/langid/
+ </developerConnection> <url>http://incubator.apache.org/stanbol/</url> </scm -->
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.enhancer.engines.dbpspotlightcandidates;version=${project.version}
+ </Export-Package>
+ <Embed-Dependency>
+ </Embed-Dependency>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 licensed files: See src/test/resources/README -->
+ <exclude>src/test/resources/en.txt</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
</project>
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/CandidateResource.java Mon Aug 20 13:48:31 2012
@@ -20,7 +20,7 @@ package org.apache.stanbol.enhancer.engi
/**
* Stores the candidate ressources given by DBPedia Spotlight Candidates.
- *
+ *
* @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
*/
public class CandidateResource {
@@ -34,7 +34,10 @@ public class CandidateResource {
public double finalScore;
public String toString() {
- return String.format( "[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, " +
- "percentageOfSecondRank=%d, contextualScore=%d]", label, uri, contextualScore, percentageOfSecondRank, support, priorScore, finalScore ) ;
+ return String
+ .format("[label=%s, uri=%s, contextualScore=%d, percentageOfSecondRank=%d, contextualScore=%d, "
+ + "percentageOfSecondRank=%d, contextualScore=%d]",
+ label, uri, contextualScore, percentageOfSecondRank,
+ support, priorScore, finalScore);
}
}
Modified: incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java?rev=1375018&r1=1375017&r2=1375018&view=diff
==============================================================================
--- incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java (original)
+++ incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpspotlightcandidates/src/main/java/org/apache/stanbol/enhancer/engines/dbpspotlightcandidates/DBPSLSurfaceForm.java Mon Aug 20 13:48:31 2012
@@ -23,7 +23,7 @@ import java.util.List;
/**
* Stores the surface forms given by DBPedia Spotlight Candidates.
- *
+ *
* @author <a href="mailto:iavor.jelev@babelmonkeys.com">Iavor Jelev</a>
*/
public class DBPSLSurfaceForm {
@@ -34,6 +34,7 @@ public class DBPSLSurfaceForm {
public List<CandidateResource> resources = new ArrayList<CandidateResource>();
public String toString() {
- return String.format( "[name=%s, offset=%i, type=%s]", name, offset, type ) ;
+ return String.format("[name=%s, offset=%i, type=%s]", name, offset,
+ type);
}
}