You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rh...@apache.org on 2013/10/03 17:13:36 UTC

svn commit: r1528907 [1/2] - in /stanbol/branches/disambiguation/enhancement-engines: foaf-disambiguation/ foaf-disambiguation/src/ foaf-disambiguation/src/main/ foaf-disambiguation/src/main/java/ foaf-disambiguation/src/main/java/org/ foaf-disambiguat...

Author: rharo
Date: Thu Oct  3 15:13:35 2013
New Revision: 1528907

URL: http://svn.apache.org/r1528907
Log:
GSoC Projects Source Code Commit:

[STANBOL-1161] - Entity Disambiguation using FOAF Correlation
[STANBOL-1156] - Freebase Entity Disambiguation

Added:
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/README.md   (with props)
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/pom.xml   (with props)
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/EntityAnnotation.java   (with props)
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/FOAFDisambiguationEngine.java   (with props)
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/
    stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/AppTest.java   (with props)
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/LICENSE
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/README.md
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/pom.xml
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/CustomDijkstraDistance.java
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/FreebaseDisambiguatorEngine.java
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/UndirectedGraphJung.java
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/constants/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/constants/FreebaseDisambiguatorEngineConstants.java
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/helper/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/freebase/graph/helper/FreebaseDisambiguatorEngineHelper.java
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/resources/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/resources/blueprints-core/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/resources/blueprints-core/pom.xml
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/resources/blueprints-neo4j-graph/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/main/resources/blueprints-neo4j-graph/pom.xml
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/test/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/test/java/
    stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/src/test/resources/

Added: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/README.md
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/README.md?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/README.md (added)
+++ stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/README.md Thu Oct  3 15:13:35 2013
@@ -0,0 +1,60 @@
+Stanbol Entity Disambiguation using FOAF Correlation
+======================================================
+
+This is the Stanbol enhacement engine developed as part of the GSoC 2013 project [1]. <br/>
+This engine uses FOAF correlation techniques to disambiguate suggested entities in a content.
+The engine's main functionality is to increase the confidence of Entity-Annotations identified from previous engines, by using 2 fundamental techniques; <br/>
+1. Processing correlating URI references in the Entities to detect connected-ness<br/>
+2. Processing foaf:name comparison with fise:selected-text
+
+Disambiguation Process in Detail
+--------------------------------
+In this project, 2 main algorithms are used as given above and finally the disambiguation-confidence calculated from both algorithms are merged as below; <br/>
+
+original-confidence = oc<br/>
+original-confidence-weight = ocw<br/>
+correlation-disambiguation-score = cds<br/>
+foaf-name-disambigiation-score = fds<br/>
+foaf-name-disambiguation-weight = fdw<br/>
+
+<code>disambiguated-confidence = (oc * ocw) + (cds * cdw) + (fds * fdw)</code>
+
+Here the default weights used are as below;<br/>
+<code>ocw : cdw : fdw = 1 : 2 : 2</code> <br/>
+
+###Correlation based disambiguation
+
+The main objective is to identify correlated URIs between entities and increase the confidence of the most 'connected' entity from the suggested entities. FOAF entities are connected to other similar entities via URI references like foaf:knows, owl:sameAs, rdf:seeAlso. These URI references can be used as correlation factors to cluster related FOAF entities and use it effectively for disambiguation. <br/>
+Not only FOAF, any other types of entities including dbpedia-ont:Person, dbpedia-ont:Organization can be connected with eachother via URI references. In this project, correlation references between entities are used as a main factor for entity disambiguation. All URI/Reference type fields of the entities are extracted and processed to find correlations with other entities suggested. The most connected entity will have the most number of URI correlations.<br/>
+
+The correlation based disambiguation algorithm basically follows below steps;<br/>
+1. Process all entities suggested for the content and extract all unique URIReferences as keys and the entities linked to them as values in a Map. <br/>
+2. For each URI reference, increase the correlation score of entity linked to it, relative to the number of entities linked.<br/>
+3. Calculate the correlation-disambiguation-confidence based on the correlation-score and add it to the total disambiguated confidence. <br/>
+
+
+###FOAF Name based disambiguation
+The second technique used is literal matching of foaf:name field of the entity with the fise:selected-texts in the content. Each entity suggested for the content will be checked for the foaf:name property and it will be matched with the list of selected-texts. With an exact match, the disambiguated-confidence will be increased.<br/> 
+
+Finally the cumulative disambiguated-confidence is calculated based on a weighted scale.<br/>
+
+How to execute the engine
+--------------------------
+This engine requires Entity-Annotations extracted from previous engines, and entityhub pre-configured with FOAF entities. 
+The entityhub-site: <code>foaf-site</code> created by indexing the btc2012 dataset including substantial amount of FOAF data can be found at [2]. <br/>
+Please go through the steps in the project's README to configure the 'foaf-site' in Stanbol entityhub and use it in the foaf-site-chain enhancement-chain. The new disambiguation-foaf engine will be used to extend the functionality of this enhancement-chain in this project.<br/>
+
+After configuring the 'foaf-site' with sufficient a FOAF dataset you can install and use the new engine by following below steps; <br/>
+1. Build the maven project using command : <code>mvn clean install</code> <br/>
+2. Start the Stanbol engine and install the bundle: <code>org.apache.stanbol.enhancer.engines.disambiguation.foaf-1.0-SNAPSHOT.jar</code><br/> 
+3. Configure the foaf-site-chain with the new disambiguation engine
+
+The new engine is identified by : <code>disambiguation-foaf</code>
+Please note that in addition to the foaf-site I have also used entitylinking with dbpedia in the foaf-site-chain to increase the amount of entitiies for disambiguation.
+Therefore after configuring the enhancement-chain successfully the foaf-site-chain should look like below; <br/>
+<pre>
+Engines: langdetect, opennlp-sentence, opennlp-token, opennlp-pos, foaf-site-linking, opennlp-ner, dbpediaLinking, disambiguation-foaf
+</pre>
+
+[1] http://www.google-melange.com/gsoc/proposal/review/google/gsoc2013/dileepaj/1 <br/>
+[2] https://github.com/dileepajayakody/FOAFSite

Propchange: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/README.md
------------------------------------------------------------------------------
    svn:executable = *

Added: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/pom.xml?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/pom.xml (added)
+++ stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/pom.xml Thu Oct  3 15:13:35 2013
@@ -0,0 +1,114 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache.stanbol</groupId>
+		<artifactId>apache-stanbol-enhancement-engines</artifactId>
+		<version>0.10.1-SNAPSHOT</version>
+		<!--change the relative path to point to enhancement-engine root pom-->
+		<relativePath>../../apache/stanbol/trunk/enhancement-engines/</relativePath>
+	</parent>
+
+	<groupId>org.apache.stanbol</groupId>
+	<artifactId>org.apache.stanbol.enhancer.engines.disambiguation.foaf</artifactId>
+	<version>1.0-SNAPSHOT</version>
+	<packaging>bundle</packaging>
+
+	<name>foaf-disambiguation</name>
+	<url>http://maven.apache.org</url>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<extensions>true</extensions>
+				<configuration>
+					<instructions>
+						<Bundle-DocURL>http://stanbol.apache.org</Bundle-DocURL>
+						<Bundle-Vendor>gsoc-dileepa</Bundle-Vendor>
+						<Bundle-SymbolicName>${project.artifactId}</Bundle-SymbolicName>
+						<Bundle-Version>${project.version}</Bundle-Version>
+						<Export-Package>
+							org.apache.stanbol.enhancer.engine.disambiguation.foaf.*;version=${project.version}
+						</Export-Package>
+						<Import-Package>
+							org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.10,0.12)",
+							*;resolution:="optional"
+            			</Import-Package>
+					</instructions>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-scr-plugin</artifactId>
+			</plugin>
+		</plugins>
+	</build>
+
+	<dependencies>
+		<!--dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> 
+			<version>3.8.1</version> <scope>test</scope> </dependency -->
+
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.commons.namespaceprefix.service</artifactId>
+			<version>0.11.0</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.engines.entitylinking.engine</artifactId>
+			<version>0.10.1-SNAPSHOT</version>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-api</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+			<version>0.11.0</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+			<version>0.10.0</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+			<version>0.10.0</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
+			<version>0.11.0</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.felix</groupId>
+			<artifactId>org.apache.felix.scr.annotations</artifactId>
+		</dependency>
+		<dependency>
+			<groupId>commons-lang</groupId>
+			<artifactId>commons-lang</artifactId>
+		</dependency>
+		<!-- Test dependencies -->
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<scope>test</scope>
+		</dependency>
+		<dependency>  <!-- used for debug level logging during tests -->
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-log4j12</artifactId>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+</project>

Propchange: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/pom.xml
------------------------------------------------------------------------------
    svn:executable = *

Added: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/EntityAnnotation.java
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/EntityAnnotation.java?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/EntityAnnotation.java (added)
+++ stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/EntityAnnotation.java Thu Oct  3 15:13:35 2013
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engine.disambiguation.foaf;
+
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An abstraction of an EntityAnnotation
+ * 
+ * @author Dileepa Jayakody
+ */
+public class EntityAnnotation implements Comparable<EntityAnnotation> {
+
+	private static final Logger log = LoggerFactory
+			.getLogger(EntityAnnotation.class);
+
+	/**
+	 * foaf:name disambiguation ratio (2.0)
+	 */
+	public static final double FOAFNAME_DISAMBIGUATION_RATIO = 2.0;
+	/**
+	 * URI Reference correlation disambiguation ratio (2.0)
+	 */
+	public static final double URI_CORRELATION_DISAMBIGUATION_RATIO = 2.0;
+	/**
+	 * Default ratio for the original fise:confidence of suggested entities
+	 */
+	public static final double ORIGINAL_CONFIDNECE_RATIO = 1.0;
+
+	/**
+	 * The weight for foaf:name disambiguation scores
+	 */
+	private double foafNameDisambiguationWeight = FOAFNAME_DISAMBIGUATION_RATIO
+			/ (FOAFNAME_DISAMBIGUATION_RATIO + ORIGINAL_CONFIDNECE_RATIO + URI_CORRELATION_DISAMBIGUATION_RATIO);
+	/**
+	 * The weight for uri-correlation disambiguation scores
+	 */
+	private double uriCorrelationDisambiguationWeight = URI_CORRELATION_DISAMBIGUATION_RATIO
+			/ (FOAFNAME_DISAMBIGUATION_RATIO + ORIGINAL_CONFIDNECE_RATIO + URI_CORRELATION_DISAMBIGUATION_RATIO);
+	/**
+	 * The weight for the original confidence scores
+	 * 
+	 */
+	private double confidenceWeight = ORIGINAL_CONFIDNECE_RATIO
+			/ (FOAFNAME_DISAMBIGUATION_RATIO + ORIGINAL_CONFIDNECE_RATIO + URI_CORRELATION_DISAMBIGUATION_RATIO);
+
+	private static final LiteralFactory lf = LiteralFactory.getInstance();
+
+	private static final UriRef ENTITYHUB_SITE = new UriRef(
+			RdfResourceEnum.site.getUri());
+
+	private UriRef uriLink;
+	private UriRef entityUri;
+	private Entity entity;
+
+	private Double originalConfidence = 0.0;
+	// private Double entityReferenceDisambiguationScore = 0.0;
+	private Double foafNameDisambiguationScore = 0.0;
+	private Double disambiguatedConfidence = 0.0;
+	private Double entityReferenceDisambiguatedConfidence = 0.0;
+	private Double foafNameDisambiguatedConfidence = 0.0;
+	// the score assigned based on the number of uri correlations with other
+	// entities
+	private int correlationScore;
+	// uri-references from this entity
+	private int referencesFromEntity;
+	private String site;
+	private String entityType;
+	private String entityLabel;
+
+	private EntityAnnotation(UriRef entityAnnotation) {
+		this.uriLink = entityAnnotation;
+	}
+
+	public EntityAnnotation(Entity entity) {
+		this.entity = entity;
+		this.entityUri = new UriRef(entity.getId());
+		this.site = entity.getSite();
+	}
+
+	/**
+	 * Allows to create EntityAnnotations from existing fise:TextAnnotation
+	 * contained in the metadata of the processed {@link ContentItem}
+	 * 
+	 * @param graph
+	 * @param uri
+	 * @return EntityAnnotation
+	 */
+	public static EntityAnnotation createFromUri(TripleCollection graph,
+			UriRef uri) {
+		EntityAnnotation entityAnnotation = new EntityAnnotation(uri);
+		entityAnnotation.entityUri = EnhancementEngineHelper.getReference(
+				graph, uri, Properties.ENHANCER_ENTITY_REFERENCE);
+		if (entityAnnotation.entityUri == null) {
+			// most likely not a fise:EntityAnnotation
+			log.debug("Unable to create Suggestion for EntityAnnotation {} "
+					+ "because property {} is not present", uri,
+					Properties.ENHANCER_ENTITY_REFERENCE);
+			return null;
+		}
+		entityAnnotation.originalConfidence = EnhancementEngineHelper.get(
+				graph, uri, Properties.ENHANCER_CONFIDENCE, Double.class, lf);
+		if (entityAnnotation.originalConfidence == null) {
+			log.warn("EntityAnnotation {} does not define a value for "
+					+ "property {}. Will use '0' as fallback", uri,
+					Properties.ENHANCER_CONFIDENCE);
+			entityAnnotation.originalConfidence = 0.0;
+		}
+		entityAnnotation.site = EnhancementEngineHelper.getString(graph, uri,
+				ENTITYHUB_SITE);
+		entityAnnotation.entityType = EnhancementEngineHelper.getString(graph,
+				uri, Properties.ENHANCER_ENTITY_TYPE);
+		entityAnnotation.entityLabel = EnhancementEngineHelper.getString(graph,
+				uri, Properties.ENHANCER_ENTITY_LABEL);
+		return entityAnnotation;
+	}
+
+	public void calculateDisambiguatedConfidence() {
+		this.disambiguatedConfidence = (originalConfidence * confidenceWeight)
+				+ this.foafNameDisambiguatedConfidence
+				+ this.entityReferenceDisambiguatedConfidence;
+	}
+
+	public void calculateFoafNameDisambiguatedConfidence() {
+		this.foafNameDisambiguatedConfidence = (foafNameDisambiguationScore * foafNameDisambiguationWeight);
+	}
+
+	/**
+	 * Calculates the disambiguation score obtained for entity's URIReference
+	 * correlations. The score is normalized between [0..1]
+	 * 
+	 * @param maximum
+	 *            correlation score of entities int max
+	 * @param minimum
+	 *            correlation score of entities int min
+	 */
+	public void calculateEntityReferenceDisambiguatedConfidence(int max, int min) {
+		if ((max - min) > 0) {
+			double normalizedCorrelationScore = (correlationScore - min)
+					/ (max - min);
+			this.entityReferenceDisambiguatedConfidence = (normalizedCorrelationScore * uriCorrelationDisambiguationWeight);
+		}
+	}
+
+	/**
+	 * The URI of the fise:EntityAnnotation representing this suggestion in the
+	 * {@link ContentItem#getMetadata() metadata} of the processed
+	 * {@link ContentItem}. This will be <code>null</code>
+	 * 
+	 * @return the URI of the fise:EntityAnnotation or <code>null</code> if not
+	 *         present.
+	 */
+	public UriRef getUriLink() {
+		return uriLink;
+	}
+
+	/**
+	 * Allows to set the URI of the fise:EntityAnnotation. This is required if
+	 * the original enhancement structure shared one fise:EntityAnnotation
+	 * instance for two fise:TextAnnotations (e.g. because both TextAnnotations
+	 * had the exact same value for fise:selected-text). After disambiguation it
+	 * is necessary to 'clone' fise:EntityAnnotations like that to give them
+	 * different fise:confidence values. Because of that it is supported to set
+	 * the new URI of the cloned fise:EntityAnnotation.
+	 * 
+	 * @param uri
+	 *            the uri of the cloned fise:EntityAnnotation
+	 */
+	public void setEntityAnnotation(UriRef uri) {
+		this.uriLink = uri;
+	}
+
+	/**
+	 * The URI of the Entity (MUST NOT be <code>null</code>)
+	 * 
+	 * @return the URI
+	 */
+	public UriRef getEntityUri() {
+		return entityUri;
+	}
+
+	/**
+	 * The original confidence of the fise:EntityAnnotation or <code>null</code>
+	 * if not available.
+	 * 
+	 * @return
+	 */
+	public Double getOriginalConfidnece() {
+		return originalConfidence;
+	}
+
+	/**
+	 * The {@link Entity} or <code>null</code> if not available. For Suggestions
+	 * that are created based on fise:EntityAnnotations the Entity is not
+	 * available. Entities might be loaded as part of the Disambiguation
+	 * process.
+	 * 
+	 * @return the {@link Entity} or <code>null</code> if not available
+	 */
+	public Entity getEntity() {
+		return entity;
+	}
+
+	/**
+	 * The confidence after disambiguation. Will be <code>null</code> at the
+	 * beginning
+	 * 
+	 * @return the disambiguated confidence or <code>null</code> if not yet
+	 *         disambiguated
+	 */
+	public Double getDisambiguatedConfidence() {
+		return disambiguatedConfidence;
+	}
+
+	/**
+	 * The name of the Entityhub {@link Site} the suggested Entity is managed.
+	 * 
+	 * @return the name of the Entityhub {@link Site}
+	 */
+	public String getSite() {
+		return site;
+	}
+
+	public void setEntityType(String entityType) {
+		this.entityType = entityType;
+	}
+
+	public String getEntityType() {
+		return entityType;
+	}
+
+	public void setEntityLabel(String entityLabel) {
+		this.entityLabel = entityLabel;
+	}
+
+	public String getEntityLabel() {
+		return entityLabel;
+	}
+
+	public void setCorrelationScore(int correlationScore) {
+		this.correlationScore = correlationScore;
+	}
+
+	/**
+	 * Setter for the confidence after disambiguation
+	 * 
+	 * @param disambiguatedConfidence
+	 */
+	public void setDisambiguatedConfidence(Double disambiguatedConfidence) {
+		this.disambiguatedConfidence = disambiguatedConfidence;
+	}
+
+	public void increaseCorrelationScore(int corefEntities) {
+		this.correlationScore += corefEntities;
+	}
+
+	public int getCorrelationScore() {
+		return correlationScore;
+	}
+
+	public void setReferencesFromEntity(int linksFromEntity) {
+		this.referencesFromEntity = linksFromEntity;
+	}
+
+	public int getReferencesFromEntity() {
+		return referencesFromEntity;
+	}
+
+	public void setFoafNameDisambiguationScore(
+			Double foafNameDisambiguationScore) {
+		this.foafNameDisambiguationScore = foafNameDisambiguationScore;
+	}
+
+	public Double getFoafNameDisambiguationScore() {
+		return foafNameDisambiguationScore;
+	}
+
+	public void setEntityReferenceDisambiguatedConfidence(
+			Double entityReferenceDisambiguatedConfidence) {
+		this.entityReferenceDisambiguatedConfidence = entityReferenceDisambiguatedConfidence;
+	}
+
+	public Double getEntityReferenceDisambiguatedConfidence() {
+		return entityReferenceDisambiguatedConfidence;
+	}
+
+	public void setFoafNameDisambiguatedConfidence(
+			Double foafNameDisambiguatedConfidence) {
+		this.foafNameDisambiguatedConfidence = foafNameDisambiguatedConfidence;
+	}
+
+	public Double getFoafNameDisambiguatedConfidence() {
+		return foafNameDisambiguatedConfidence;
+	}
+
+	@Override
+	public int hashCode() {
+		return entityUri.hashCode();
+	}
+
+	@Override
+	public boolean equals(Object obj) {
+		return obj instanceof EntityAnnotation
+				&& ((EntityAnnotation) obj).entityUri.equals(entityUri);
+	}
+
+	/**
+	 * Compares based on the {@link #getDisambiguatedConfidence()} (if present)
+	 * and falls back to the {@link #getOriginalConfidnece()}. If the original
+	 * confidence value is not present or both Suggestions do have the same
+	 * confidence the natural order of the Entities URI is used. This also
+	 * ensures <code>(x.compareTo(y)==0) == (x.equals(y))</code> and allows to
+	 * use this class with {@link SortedMap} and {@link SortedSet}
+	 * implementations.
+	 * <p>
+	 */
+	@Override
+	public int compareTo(EntityAnnotation other) {
+		int result;
+		if (disambiguatedConfidence != null
+				&& other.disambiguatedConfidence != null) {
+			result = other.disambiguatedConfidence
+					.compareTo(disambiguatedConfidence);
+		} else if (other.originalConfidence != null
+				&& originalConfidence != null) {
+			result = other.originalConfidence.compareTo(originalConfidence);
+		} else {
+			result = 0;
+		}
+		// ensure (x.compareTo(y)==0) == (x.equals(y))
+		return result == 0 ? entityUri.getUnicodeString().compareTo(
+				other.entityUri.getUnicodeString()) : result;
+	}
+
+}

Propchange: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/EntityAnnotation.java
------------------------------------------------------------------------------
    svn:executable = *

Added: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/FOAFDisambiguationEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/FOAFDisambiguationEngine.java?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/FOAFDisambiguationEngine.java (added)
+++ stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/FOAFDisambiguationEngine.java Thu Oct  3 15:13:35 2013
@@ -0,0 +1,381 @@
+package org.apache.stanbol.enhancer.engine.disambiguation.foaf;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteException;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The FOAF Disambiguation Engine analyses the connected-ness of the entities
+ * suggested in a content item by identifying correlated URI references of the
+ * entities. The fise:confidence of the entities are increased with the number
+ * of matches of references with other entities.
+ * 
+ * 
+ * @author Dileepa Jayakody
+ * 
+ */
+@Component(immediate = true, metatype = true)
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "disambiguation-foaf") })
+public class FOAFDisambiguationEngine extends
+		AbstractEnhancementEngine<IOException, RuntimeException> implements
+		EnhancementEngine, ServiceProperties {
+
+	private static Logger log = LoggerFactory
+			.getLogger(FOAFDisambiguationEngine.class);
+
+	/**
+	 * The default value for the execution of this Engine. Currently set to
+	 * {@link ServiceProperties#ORDERING_POST_PROCESSING} + 90.
+	 * <p>
+	 * This should ensure that this engines runs as one of the first engines of
+	 * the post-processing phase
+	 */
+	public static final Integer defaultOrder = ServiceProperties.ORDERING_POST_PROCESSING - 90;
+
+	/**
+	 * The {@link LiteralFactory} used to create typed RDF literals
+	 */
+	private final LiteralFactory literalFactory = LiteralFactory.getInstance();
+
+	@Reference
+	protected SiteManager siteManager;
+
+	@Reference
+	protected NamespacePrefixService namespacePrefixService;
+
+	// all the URIReferences of entities and the entities which are linked to
+	// those URIreferences
+	// key: URIReference value: Set<EntityAnnotation>
+	private Map<String, Set<UriRef>> urisReferencedByEntities = new HashMap<String, Set<UriRef>>();
+	// all entity annotations suggested for the content
+	private Map<UriRef, EntityAnnotation> allEnitityAnnotations = new HashMap<UriRef, EntityAnnotation>();
+	//correlation scores extracted from URIReference correlations of the suggested entities
+	private SortedSet<Integer> correlationScoresOfEntities = new TreeSet<Integer>();
+	private String FOAF_NAMESPACE;
+
+	@Override
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(
+				ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+	@Override
+	public int canEnhance(ContentItem ci) throws EngineException {
+		// check if content is present
+		try {
+			if ((ContentItemHelper.getText(ci.getBlob()) == null)
+					|| (ContentItemHelper.getText(ci.getBlob()).trim()
+							.isEmpty())) {
+				return CANNOT_ENHANCE;
+			}
+		} catch (IOException e) {
+			log.error("Failed to get the text for "
+					+ "enhancement of content: " + ci.getUri(), e);
+			throw new InvalidContentException(this, ci, e);
+		}
+		// default enhancement is synchronous enhancement
+		return ENHANCE_SYNCHRONOUS;
+	}
+
+	@Override
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		MGraph graph = ci.getMetadata();
+		FOAF_NAMESPACE = namespacePrefixService.getNamespace("foaf");
+		Iterator<Triple> it = graph.filter(null, RDF_TYPE,
+				TechnicalClasses.ENHANCER_TEXTANNOTATION);
+		while (it.hasNext()) {
+			UriRef textAnnotation = (UriRef) it.next().getSubject();
+			// NOTE: this iterator will also include dc:relation between
+			// fise:TextAnnotation's
+			Iterator<Triple> relatedLinks = graph.filter(null, DC_RELATION,
+					textAnnotation);
+			// extracting selected text for foaf-name comparison
+			Iterator<Triple> selectedTextsItr = graph.filter(textAnnotation,
+					ENHANCER_SELECTED_TEXT, null);
+			while (relatedLinks.hasNext()) {
+				UriRef link = (UriRef) relatedLinks.next().getSubject();
+				EntityAnnotation suggestion = EntityAnnotation.createFromUri(
+						graph, link);
+				// if returned suggestion is an entity-annotation proceed with
+				// disambiguation process
+				if (suggestion != null) {
+					// process entityAnnotation for disambiguation
+					try {
+						// process co-referenced entity-references
+						processEntityReferences(suggestion);
+						// matching with foaf:name
+						processFOAFNameDisambiguation(suggestion,
+								selectedTextsItr);
+						// adding new entity annotation to the global map
+						allEnitityAnnotations.put(suggestion.getEntityUri(),
+								suggestion);
+					} catch (SiteException e) {
+						log.error("Error occured while processing entity-annotations : \n"
+								+ e.getMessage());
+						e.printStackTrace();
+					}
+				}
+			}
+		}
+		// calculate correlation scores for entities and disambiguate
+		caculateURICorrelationScoreForEntities();
+		disambiguateEntityReferences();
+		// writing back to graph
+		ci.getLock().writeLock().lock();
+		try {
+			applyDisambiguationResults(graph);
+		} finally {
+			ci.getLock().writeLock().unlock();
+		}
+		clearEhancementData();
+	}
+
+	public void clearEhancementData() {
+		urisReferencedByEntities.clear();
+		allEnitityAnnotations.clear();
+	}
+
+	public Entity getEntityFromEntityHub(EntityAnnotation sug)
+			throws SiteException {
+		UriRef entityUri = sug.getEntityUri();
+		String entityhubSite = sug.getSite();
+		Entity entity = null;
+		// dereferencing the entity from the entityhub
+		if (entityhubSite != null && entityUri != null) {
+			entity = siteManager.getSite(entityhubSite).getEntity(
+					entityUri.getUnicodeString());
+		}
+		return entity;
+	}
+
+	/**
+	 * <p>
+	 * Validates the foaf:name of the entity with the selected text from the
+	 * content, if matched the confidence of the EntityAnnotation is increased.
+	 * </p>
+	 * 
+	 * @param EntityAnnotation
+	 *            ea
+	 * @param The
+	 *            fise:selected-text tokens of the content selectedTextsTriples
+	 * @throws SiteException
+	 */
+	public void processFOAFNameDisambiguation(EntityAnnotation ea,
+			Iterator<Triple> selectedTextsTriples) throws SiteException {
+		Entity entity = this.getEntityFromEntityHub(ea);
+		Representation entityRep = entity.getRepresentation();
+		String foafNameURI = this.FOAF_NAMESPACE + "name";
+		//when comparing selected text with foaf:name, all whitespaces and non-word chars are removed
+		String regexPattern = "[\\s\\W]";
+		Text foafNameText = ((Text) entityRep.getFirst(foafNameURI));
+		if (foafNameText != null) {
+			String foafName = foafNameText.getText();
+			// if the selected-text matches exactly with the foaf-name then
+			// increase the ds by 1
+			Double foafNameScore = 0.0;
+			while (selectedTextsTriples.hasNext()) {
+				String selectedText = ((Literal) selectedTextsTriples.next()
+						.getObject()).getLexicalForm();
+				String selectedTextStr = selectedText.replaceAll(regexPattern, "");
+				if (foafName != null) {
+					String foafNameStr = foafName.replaceAll(regexPattern, "");
+					System.out.println("the regexed foafName:" + foafNameStr);
+					if (selectedTextStr.equalsIgnoreCase(foafNameStr)) {
+						foafNameScore++;
+						break;
+					}
+				}
+
+			}
+			ea.setFoafNameDisambiguationScore(foafNameScore);
+		}
+	}
+
+	/**
+	 * <p>
+	 * Processes all the URIReference type fields of entities and add them to
+	 * the global map as keys and entities as values
+	 * </p>
+	 * 
+	 * @param The
+	 *            EntityAnnotation to process entityAnnotation
+	 * @throws SiteException
+	 */
+	public void processEntityReferences(EntityAnnotation entityAnnotation)
+			throws SiteException {
+		Entity entity = this.getEntityFromEntityHub(entityAnnotation);
+		Representation entityRep = entity.getRepresentation();
+		Iterator<String> fields = entityRep.getFieldNames();
+		int linksFromEntity = 0;
+		while (fields.hasNext()) {
+			String field = fields.next();
+			Iterator<org.apache.stanbol.entityhub.servicesapi.model.Reference> urisReferenced = entityRep
+					.getReferences(field);
+			while (urisReferenced.hasNext()) {
+				org.apache.stanbol.entityhub.servicesapi.model.Reference uriReference = urisReferenced
+						.next();
+				linksFromEntity++;
+				String referenceString = uriReference.getReference();
+				if (urisReferencedByEntities.containsKey(referenceString)) {
+					Set<UriRef> eas = urisReferencedByEntities
+							.get(referenceString);
+					eas.add(entityAnnotation.getEntityUri());
+					urisReferencedByEntities.put(referenceString, eas);
+				} else {
+					Set<UriRef> eas = new HashSet<UriRef>();
+					eas.add(entityAnnotation.getEntityUri());
+					// key:link, value:entityAnnotation set referencing link
+					urisReferencedByEntities.put(referenceString, eas);
+				}
+			}
+		}
+		entityAnnotation.setReferencesFromEntity(linksFromEntity);
+	}
+
+	/**
+	 * <p>
+	 * Counts the number of correlated URI-References and add that score to
+	 * correlated entities
+	 * </p>
+	 */
+	public void caculateURICorrelationScoreForEntities() {
+		for (String uriReference : urisReferencedByEntities.keySet()) {
+			Set<UriRef> entityAnnotationsLinked = urisReferencedByEntities
+					.get(uriReference);
+			int correlationScoreForURI = entityAnnotationsLinked.size();
+			// adding the correlationscore to the global set for normalization
+			// requirements
+			this.correlationScoresOfEntities.add(new Integer(
+					correlationScoreForURI));
+			for (UriRef ea : entityAnnotationsLinked) {
+				if (allEnitityAnnotations.get(ea) != null) {
+					allEnitityAnnotations.get(ea).increaseCorrelationScore(
+							correlationScoreForURI);
+				}
+			}
+		}
+	}
+
+	public void disambiguateEntityReferences() {
+		int allUriRefs = urisReferencedByEntities.keySet().size();
+		for (EntityAnnotation ea : allEnitityAnnotations.values()) {
+			this.performEntityReferenceDisambiguation(ea, allUriRefs);
+		}
+	}
+
+	public void performEntityReferenceDisambiguation(EntityAnnotation ea,
+			int allUriReferences) {
+		int correlationScoreForEntity = ea.getCorrelationScore();
+		int refsFromEntity = ea.getReferencesFromEntity();
+		int correlationsWithOtherEntities = correlationScoreForEntity
+				- refsFromEntity;
+		ea.setCorrelationScore(correlationsWithOtherEntities);
+	}
+
+	public void applyDisambiguationResults(MGraph graph) {
+		int max = this.correlationScoresOfEntities.last();
+		int min = this.correlationScoresOfEntities.first();
+	
+		for (EntityAnnotation ea : allEnitityAnnotations.values()) {
+			// calculate total dc
+			ea.calculateFoafNameDisambiguatedConfidence();
+			ea.calculateEntityReferenceDisambiguatedConfidence(max, min);
+			ea.calculateDisambiguatedConfidence();
+			/*
+			System.out.println("\n\nEntity : " + ea.getEntityLabel()
+					+ "\n site: " + ea.getSite() + "\n originalconf: "
+					+ ea.getOriginalConfidnece().toString()
+					+ "\n no of links from entity: "
+					+ ea.getReferencesFromEntity()
+					+ "\n  entity foafname-score :"
+					+ ea.getFoafNameDisambiguationScore()
+					+ "\n no of matches : " + ea.getCorrelationScore()
+					+ "\n  entity correlation-score :"
+					+ ea.getCorrelationScore() + "\n foaf name disamb-conf: "
+					+ ea.getFoafNameDisambiguatedConfidence().toString()
+					+ "\n entity reference disamb-conf: "
+					+ ea.getEntityReferenceDisambiguatedConfidence().toString()
+					+ "\n Total disamb-conf: "
+					+ ea.getDisambiguatedConfidence().toString());
+*/
+			EnhancementEngineHelper.set(graph, ea.getUriLink(),
+					ENHANCER_CONFIDENCE, ea.getDisambiguatedConfidence(),
+					literalFactory);
+			// adding this engine as a contributor
+			EnhancementEngineHelper.addContributingEngine(graph,
+					ea.getUriLink(), this);
+		}
+	}
+
+	/**
+	 * Activate and read the properties
+	 * 
+	 * @param ce
+	 *            the {@link ComponentContext}
+	 */
+	@Activate
+	protected void activate(ComponentContext ce) throws ConfigurationException {
+		try {
+			super.activate(ce);
+
+		} catch (IOException e) {
+			log.error("Error in activation method.", e);
+		}
+	}
+
+	/**
+	 * Deactivate
+	 * 
+	 * @param ce
+	 *            the {@link ComponentContext}
+	 */
+	@Deactivate
+	protected void deactivate(ComponentContext ce) {
+		super.deactivate(ce);
+	}
+}

Propchange: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/foaf/FOAFDisambiguationEngine.java
------------------------------------------------------------------------------
    svn:executable = *

Added: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/AppTest.java
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/AppTest.java?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/AppTest.java (added)
+++ stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/AppTest.java Thu Oct  3 15:13:35 2013
@@ -0,0 +1,38 @@
+package org.apache.stanbol;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest 
+    extends TestCase
+{
+    /**
+     * Create the test case
+     *
+     * @param testName name of the test case
+     */
+    public AppTest( String testName )
+    {
+        super( testName );
+    }
+
+    /**
+     * @return the suite of tests being tested
+     */
+    public static Test suite()
+    {
+        return new TestSuite( AppTest.class );
+    }
+
+    /**
+     * Rigourous Test :-)
+     */
+    public void testApp()
+    {
+        assertTrue( true );
+    }
+}

Propchange: stanbol/branches/disambiguation/enhancement-engines/foaf-disambiguation/src/test/java/org/apache/stanbol/AppTest.java
------------------------------------------------------------------------------
    svn:executable = *

Added: stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/LICENSE
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/LICENSE?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/LICENSE (added)
+++ stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/LICENSE Thu Oct  3 15:13:35 2013
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!) The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [2013] [Antonio David Perez Morales]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file

Added: stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/README.md
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/README.md?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/README.md (added)
+++ stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/README.md Thu Oct  3 15:13:35 2013
@@ -0,0 +1,53 @@
+# Freebase Disambiguation Engine #
+
+The Freebase Disambiguation Engine is a Stanbol Enhancer Engine responsible of try to disambiguate entities depending on the context of such entities.
+This engine uses an algorithm based on minimum distances between entities in the Freebase Graph (generated using the [Freebase graph importer][1]) 
+
+The disambiguation algorithm should take into account a local disambiguation score (comparing in some way the document context with the contexts provided by Wikilinks resource) and a global disambiguation score computed by a graph based algorithm using the Freebase graph imported in a Neo4j database. Each disambiguation score would have a different weight in the final disambiguation store for each entity. The algorithm's steps, for each TextAnnotation, can be the following:
+
+1. Local score: for each EntityAnnotation, retrieves from Wikilinks database all the contexts associated to the referenced entity. Compare (similarity, distance....) the mention context (selected-context) with the wikilinks contexts.
+
+2. Global score: build a subgraph with all the possible entities and its relations in Freebase. Extract a set of possibles solutions from such graph (note: a solution should include only one entity annotation for each text annotation). Compute the Dijsktra distance between each pair of entities belonging to a possible solution.
+
+3. Weights normalization and confidence values refinement. 
+
+## Freebase Stanbol Enhancer Engine ##
+
+This engine implements the above algorithm but the first point (local score) which is not implemented in this version.
+
+The algorithm builds a subgraph from the whole Freebase graph only for the entities returned after the NLP and Entity linking process, and the relations between them.
+
+Using the Entity Annotations for each Text Annotation, it builds all the possible solutions for the text to enhance. It means, all the possible tuples result of combining the entities in each set of entity annotations (for each text annotation).
+
+The searched solution is the tuple minimizing the distance in the graph between every pair of entities in the tuple. Minimal distance means higher disambiguation score.
+
+## How to use it ##
+
+In order to use the engine, do the following:  
+
+1. Download the code
+2. Run `'mvn clean package'` command
+3. In the *target/* directory, find the bundle called `gsoc-freebase-disambiguation-{version}-jar`
+4. Install it in Stanbol using the Felix Web Console
+
+**Note:** This bundle depends on blueprints-core` and `blueprints-neo4j-graph`. You have to download the source code from [Blueprints repository][2] and use the pom files located in *src/main/resources* folder of this project to convert them into bundles and install them in Stanbol
+
+## Configuration ##
+
+Once the bundle is deployed and active in Stanbol, go to configuration tab in Felix Web Console of Stanbol and configure the *FreebaseDisambiguatprEngine*:
+* Name of the engine: default value is **freebase-disambiguation**
+* Neo4j graph location: default value is empty. You must set the location of the graph and restart the component in the *Component* tab.
+
+The last step is configure a new engine in the enhancement chain using the name set in the configuration (freebase-disambiguator).
+
+## Jira ##
+
+This tool is related to the [issue 1157](https://issues.apache.org/jira/browse/STANBOL-1157) of Stanbol Jira.  
+
+## License
+
+GSoC Freebase Disambiguation Engine is distributed under the terms of the [Apache License, 2.0](http://www.apache.org/licenses/LICENSE-2.0.html).
+
+[1]: https://github.com/adperezmorales/gsoc-freebase-graph-importer/tree/master/gsoc-freebase-graph-importer
+[2]: https://github.com/tinkerpop/blueprints
+

Added: stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/pom.xml?rev=1528907&view=auto
==============================================================================
--- stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/pom.xml (added)
+++ stanbol/branches/disambiguation/enhancement-engines/freebase-disambiguation/pom.xml Thu Oct  3 15:13:35 2013
@@ -0,0 +1,221 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>com.gsoc.freebase</groupId>
+	<artifactId>gsoc-freebase-disambiguation-engine</artifactId>
+	<version>1.0-SNAPSHOT</version>
+	<name>Freebase Disambiguation Engine</name>
+	<description>Freebase Disambiguation Engine which makes use of the Graph generated by the gsoc-freebase-importer to disambiguate entities in the Stanbol Enhancer Chain</description>
+
+	<!-- Developers -->
+	<developers>
+		<developer>
+			<name>Antonio David Perez Morales</name>
+			<email>adperezmorales@gmail.com</email>
+			<id>adpm</id>
+		</developer>
+	</developers>
+
+	<!-- Properties -->
+	<properties>
+		<stanbol.version>0.10.0</stanbol.version>
+		<stanbol.snapshotversion>0.10.0-SNAPSHOT</stanbol.snapshotversion>
+		<felix.version>1.6.0</felix.version>
+		<blueprints.version>2.3.0</blueprints.version>
+	</properties>
+
+	<!-- Dependencies -->
+	<dependencies>
+		<!-- Stanbol dependencies -->
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+			<version>${stanbol.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.test</artifactId>
+			<version>${stanbol.version}</version>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.engine.disambiguation.mlt</artifactId>
+			<version>${stanbol.snapshotversion}</version>
+			<scope>provided</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.felix</groupId>
+			<artifactId>org.apache.felix.scr.annotations</artifactId>
+			<version>${felix.version}</version>
+			<scope>provided</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>com.tinkerpop.blueprints</groupId>
+			<artifactId>blueprints-core</artifactId>
+			<version>2.3.0</version>
+			<scope>compile</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>com.tinkerpop.blueprints</groupId>
+			<artifactId>blueprints-neo4j-graph</artifactId>
+			<version>${blueprints.version}</version>
+			<scope>compile</scope>
+		</dependency>
+
+		<dependency>
+			<groupId>com.tinkerpop.blueprints</groupId>
+			<artifactId>blueprints-graph-jung</artifactId>
+			<version>${blueprints.version}</version>
+			<scope>compile</scope>
+		</dependency>
+
+		<!-- Guava Dependency -->
+		<dependency>
+			<groupId>com.google.guava</groupId>
+			<artifactId>guava</artifactId>
+			<version>14.0.1</version>
+		</dependency>
+	</dependencies>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugin</groupId>
+				<artifactId>maven-jar-plugin</artifactId>
+				<version>2.4</version>
+				<configuration>
+					<archive>
+						<manifestFile>${project.build.outputDirectory}/META-INF/MANIFEST.MF</manifestFile>
+					</archive>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<extensions>true</extensions>
+				<configuration>
+					<instructions>
+						<!-- Enable this for including your --> <!-- enhancement chain configuration --> <!-- <Install-Path>config</Install-Path> -->
+						<Export-Package>
+							org.apache.stanbol.enhancer.engine.disambiguation.freebase*;version=${project.version}
+						</Export-Package>
+						<Import-Package>com.google.common.collect;version="13.0.1",
+							*;resolution:="optional"</Import-Package>
+						<!-- <Embed-Dependency>*;scope=compile;inline=true;artifactId=!neo4j|!org.codehaus|!org.apache.stanbol.enhancer.engine.disambiguation.mlt</Embed-Dependency> 
+							<Embed-Directory>lib/</Embed-Directory> <Embed-Transitive>true</Embed-Transitive> 
+							<Include-Resource>{maven-dependencies}</Include-Resource> -->
+					</instructions>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-scr-plugin</artifactId>
+				<executions>
+					<execution>
+						<id>generate-scr-scrdescriptor</id>
+						<goals>
+							<goal>scr</goal>
+						</goals>
+						<configuration>
+							<properties>
+								<service.vendor>gsoc-freebase-disambiguation-engine</service.vendor>
+							</properties>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+		</plugins>
+
+		<!-- Plugins Management -->
+		<pluginManagement>
+			<plugins>
+				<plugin>
+					<groupId>org.apache.felix</groupId>
+					<artifactId>maven-bundle-plugin</artifactId>
+					<version>2.3.7</version>
+					<inherited>true</inherited>
+					<configuration>
+						<archive>
+							<manifestFile>${project.build.outputDirectory}/META-INF/MANIFEST.MF</manifestFile>
+						</archive>
+						<instructions>
+							<Bundle-DocURL>http://stanbol.apache.org</Bundle-DocURL>
+							<Bundle-Vendor>gsoc</Bundle-Vendor>
+							<Bundle-SymbolicName>${project.artifactId}</Bundle-SymbolicName>
+							<_versionpolicy>$${version;===;${@}}</_versionpolicy>
+						</instructions>
+					</configuration>
+					<executions>
+						<execution>
+							<id>bundle-manifest</id>
+							<phase>process-classes</phase>
+							<goals>
+								<goal>manifest</goal>
+							</goals>
+						</execution>
+						<execution>
+							<id>bundle-bundle</id>
+							<phase>package</phase>
+							<goals>
+								<goal>bundle</goal>
+							</goals>
+						</execution>
+					</executions>
+				</plugin>
+				<plugin>
+					<groupId>org.apache.felix</groupId>
+					<artifactId>maven-scr-plugin</artifactId>
+					<version>1.7.4</version>
+					<executions>
+						<execution>
+							<id>generate-scr-scrdescriptor</id>
+							<goals>
+								<goal>scr</goal>
+							</goals>
+							<configuration>
+								<properties>
+									<service.vendor>gsoc-freebase-disambiguation-engine</service.vendor>
+								</properties>
+							</configuration>
+						</execution>
+					</executions>
+				</plugin>
+
+				<!--This plugin's configuration is used to store Eclipse m2e settings 
+					only. It has no influence on the Maven build itself. -->
+				<plugin>
+					<groupId>org.eclipse.m2e</groupId>
+					<artifactId>lifecycle-mapping</artifactId>
+					<version>1.0.0</version>
+					<configuration>
+						<lifecycleMappingMetadata>
+							<pluginExecutions>
+								<pluginExecution>
+									<pluginExecutionFilter>
+										<groupId>org.apache.felix</groupId>
+										<artifactId>
+											maven-scr-plugin
+										</artifactId>
+										<versionRange>
+											[1.7.4,)
+										</versionRange>
+										<goals>
+											<goal>scr</goal>
+										</goals>
+									</pluginExecutionFilter>
+									<action>
+										<ignore></ignore>
+									</action>
+								</pluginExecution>
+							</pluginExecutions>
+						</lifecycleMappingMetadata>
+					</configuration>
+				</plugin>
+			</plugins>
+		</pluginManagement>
+		<!-- End Plugins Management -->
+	</build>
+</project>
\ No newline at end of file