You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/02/19 06:14:19 UTC
svn commit: r1447589 - in
/stanbol/trunk/enhancement-engines/disambiguation-mlt: ./ src/ src/main/
src/main/java/ src/main/java/org/ src/main/java/org/apache/
src/main/java/org/apache/stanbol/
src/main/java/org/apache/stanbol/enhancer/ src/main/java/or...
Author: rwesten
Date: Tue Feb 19 05:14:18 2013
New Revision: 1447589
URL: http://svn.apache.org/r1447589
Log:
STANBOL-941: Moved the disambiguation-mlt engine to the trunk. No functional changes only adapted dependencies to released versions
Added:
stanbol/trunk/enhancement-engines/disambiguation-mlt/ (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/README.md (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguationData.java (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/SavedEntity.java (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java (with props)
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/
stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties (with props)
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Feb 19 05:14:18 2013
@@ -0,0 +1,7 @@
+target
+
+.classpath
+
+.settings
+
+.project
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/README.md
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/README.md?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/README.md (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/README.md Tue Feb 19 05:14:18 2013
@@ -0,0 +1,29 @@
+README
+=======
+To install this Engine.
+Maven should be installed.
+
+ dir/: the directory where it was code is present
+
+1. Have a Stanbol running.
+2. Open terminal and go to /dir/.
+3. run: mvn clean compile install
+4. Open a browser and open link http://localhost:8080/system/console/bundles .
+5. then click install/Update button
+6. on the pop up, check start bundle, click on browse and go to directory, select /dir/target/org.apache.stanbol.enhancer.engine.disambiguation.mlt-0.0.1-SNAPSHOT.jar
+
+you can see that entity disambiguation engine is installed.
+
+
+TEST
+------------------------
+
+Before Installation of Engine if you input text
+
+"Paris is a small city in the state of Texas".
+
+It identifies Texas as state of US and Paris as Paris, France.
+
+
+When the Entity disambiguation engine works on it to it correctly identifies Paris as Paris, Texas.
+
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/README.md
------------------------------------------------------------------------------
svn:executable = *
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml Tue Feb 19 05:14:18 2013
@@ -0,0 +1,86 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>apache-stanbol-enhancement-engines</artifactId>
+ <version>0.10.0-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.engine.disambiguation.mlt</artifactId>
+ <version>0.10.0-SNAPSHOT</version>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancer Enhancement Engine : Disambiguation using Solr MLT</name>
+ <description>
+ Entity Disambiguation Enhancement Engine that performs Similarity queries on
+ the Stanbol Entityhub to re-rank suggested Entities.
+ </description>
+
+ <inceptionYear>2012</inceptionYear>
+
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/disambiguation-mlt/
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/disambiguation-mlt/
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.enhancer.engine.disambiguation.mlt.*;version=${project.version}
+ </Export-Package>
+ <Embed-Dependency>
+ </Embed-Dependency>
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+ <version>0.11.0</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>0.10.0</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.entityhub.servicesapi</artifactId>
+ <version>0.11.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>org.apache.felix.scr.annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+
+ </dependencies>
+</project>
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml
------------------------------------------------------------------------------
svn:executable = *
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguationData.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguationData.java?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguationData.java (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguationData.java Tue Feb 19 05:14:18 2013
@@ -0,0 +1,121 @@
+package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+
+/**
+ * Collects all data needed for Disambiguation
+ *
+ * @author Rupert Westenthaler
+ * @author Kritarth
+ *
+ */
+public class DisambiguationData {
+
+ /**
+ * used by #c
+ */
+ private DisambiguationData() {}
+
+ /**
+ * Stores the URIs of fise:EntityAnnnotation as key and the fise:TextAnnotation they link to as value.
+ * <p>
+ * This is needed during writing the disambiguation results to the EnhancementStructure to know if one
+ * needs to clone an fise:EntityAnnotation or not.
+ */
+ public Map<UriRef,Set<UriRef>> suggestionMap = new HashMap<UriRef,Set<UriRef>>();
+
+ /**
+ * Holds the center position of the fise:TextAnnotation fise:selected-text as key and the SavedEntity
+ * (representing the extracted data for the fise:TextAnnotation) as value.
+ * <p>
+ * Intended to do fast index based lookup for other TextAnnotations when building contexts for
+ * disambiguations.
+ */
+ public NavigableMap<Integer,SavedEntity> directoryTextAnotation = new TreeMap<Integer,SavedEntity>();
+ /**
+ * Collection with the 'fise:selected-text' of all 'fise:TextAnnotations' Also those that are NOT included
+ * in {@link #textAnnotations} (e.g. because they are missing some required data)
+ */
+ public Collection<String> allSelectedTexts = new HashSet<String>();
+ /**
+ * List of all fise:textAnnotations that can be used for disambiguation. the key is the URI and the value
+ * is the {@link SavedEntity} with the extracted information.
+ */
+ public Map<UriRef,SavedEntity> textAnnotations = new HashMap<UriRef,SavedEntity>();
+
+ // List to contain old confidence values that are to removed
+ // List<Triple> loseConfidence = new ArrayList<Triple>();
+ // List to contain new confidence values to be added to metadata
+ // List<Triple> gainConfidence = new ArrayList<Triple>();
+
+ /*
+ * We create a data structure that stores the mapping of text annotation to List of Uri of all possible
+ * amiguations of the Text. Also it fills the list loseconfidence with confidence values of all the
+ * ambiguations for all entities (which will be removed eventually)
+ */
+ public static DisambiguationData createFromContentItem(ContentItem ci) {
+ MGraph graph = ci.getMetadata();
+ DisambiguationData data = new DisambiguationData();
+ Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION);
+ while (it.hasNext()) {
+ UriRef uri = (UriRef) it.next().getSubject();
+ // TODO: rwesten: do we really want to ignore fise:TextAnnotations that link to
+ // to an other one (typically two TextAnnotations that select the exact same text)
+ // if (graph.filter(uri, new UriRef(NamespaceEnum.dc + "relation"), null).hasNext()) {
+ // continue;
+ // }
+
+ SavedEntity savedEntity = SavedEntity.createFromTextAnnotation(graph, uri);
+ if (savedEntity != null) {
+ // data.allEntities.add(savedEntity.getContext());
+ data.directoryTextAnotation.put(
+ Integer.valueOf((savedEntity.getStart() + savedEntity.getEnd()) / 2), savedEntity);
+ // add information to the #suggestionMap
+ for (Suggestion s : savedEntity.getSuggestions()) {
+ Set<UriRef> textAnnotations = data.suggestionMap.get(s.getEntityAnnotation());
+ if (textAnnotations == null) {
+ textAnnotations = new HashSet<UriRef>();
+ data.suggestionMap.put(s.getEntityAnnotation(), textAnnotations);
+ }
+ textAnnotations.add(savedEntity.getUri());
+ }
+ // NOTE (rwesten):
+ // changed the layout here. Now savedEntity contains the list
+ // of suggestions
+ data.textAnnotations.put(uri, savedEntity);
+ data.allSelectedTexts.add(savedEntity.getName());
+ } else { // some information are also needed for other TextAnnotations
+ // like the selectedText of TextAnnotations (regardless if they
+ // have suggestions or not
+ String selectedText = EnhancementEngineHelper.getString(graph, uri, ENHANCER_SELECTED_TEXT);
+ if (selectedText != null) {
+ data.allSelectedTexts.add(selectedText);
+ }
+ }
+
+ }
+ return data;
+ }
+}
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguationData.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java Tue Feb 19 05:14:18 2013
@@ -0,0 +1,866 @@
+/*
+ * Copyright 2012, FORMCEPT [http://www.formcept.com]
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDFS_LABEL;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.NavigableMap;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.lang.StringUtils;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.defaults.SpecialFieldEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
+import org.apache.stanbol.entityhub.servicesapi.query.SimilarityConstraint;
+import org.apache.stanbol.entityhub.servicesapi.query.TextConstraint;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteException;
+import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Disambiguation Engine using Entityhub {@link SimilarityConstraint}s to disambiguate between existing
+ * fise:EntityAnnotations for fise:TextAnnotations.
+ * <p>
+ * <b>TODOs</b>:
+ * <ul>
+ * <li>Configurations: currently all configurations is set to the defaults
+ * <li>Context: test and improve different ways to determine the context used for disambiguation.
+ * <li>URI based similarity: currently only full text similarity is used. However it would also be possible to
+ * use the {@link SpecialFieldEnum#references} field to disambiguate based on URIs of already suggested
+ * Entities.
+ * </ul>
+ *
+ * @author Kritarth Anand
+ * @author Rupert Westenthaler
+ */
+@Component(immediate = true, metatype = true)
+@Service
+@Properties(value = {@Property(name = EnhancementEngine.PROPERTY_NAME, value = "disambiguation-mlt")})
+public class DisambiguatorEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements
+ EnhancementEngine, ServiceProperties {
+
+ private static Logger log = LoggerFactory.getLogger(DisambiguatorEngine.class);
+
+ /**
+ * Service URL
+ */
+ private String serviceURL;
+
+ /**
+ * The default value for the execution of this Engine. Currently set to
+ * {@link ServiceProperties#ORDERING_POST_PROCESSING} + 90.
+ * <p>
+ * This should ensure that this engines runs as one of the first engines of the post-processing phase
+ */
+ public static final Integer defaultOrder = ServiceProperties.ORDERING_POST_PROCESSING - 90;
+ /**
+ * The plain text might be required for determining the extraction context
+ */
+ public static final String PLAIN_TEXT_MIMETYPE = "text/plain";
+ /**
+ * Contains the only supported mime type {@link #PLAIN_TEXT_MIMETYPE}
+ */
+ public static final Set<String> SUPPORTED_MIMETYPES = Collections.singleton(PLAIN_TEXT_MIMETYPE);
+
+ /**
+ * Used to lookup the Entityhub {@link Site} used to perform the disambiguation.
+ */
+ @Reference
+ protected SiteManager siteManager;
+
+ /*
+ * The following parameters describe the ratio of the original fise:confidence values and the
+ * disambiguation scores contributing to the final disambiguated fise:confidence
+ *
+ * TODO: make configurable
+ */
+ /**
+ * Default ratio for Disambiguation (2.0)
+ */
+ public static final double DEFAULT_DISAMBIGUATION_RATIO = 2.0;
+ /**
+ * Default ratio for the original fise:confidence of suggested entities
+ */
+ public static final double DEFAULT_CONFIDNECE_RATIO = 1.0;
+
+ /**
+ * The weight for disambiguation scores <code>:= disRatio/(disRatio+confRatio)</code>
+ */
+ private double disambiguationWeight = DEFAULT_DISAMBIGUATION_RATIO
+ / (DEFAULT_DISAMBIGUATION_RATIO + DEFAULT_CONFIDNECE_RATIO);
+ /**
+ * The weight for the original confidence scores <code>:= confRatio/(disRatio+confRatio)</code>
+ */
+ private double confidenceWeight = DEFAULT_CONFIDNECE_RATIO
+ / (DEFAULT_DISAMBIGUATION_RATIO + DEFAULT_CONFIDNECE_RATIO);
+
+ /**
+ * The {@link LiteralFactory} used to create typed RDF literals
+ */
+ private final LiteralFactory literalFactory = LiteralFactory.getInstance();
+
+ /**
+ * Returns the properties containing the {@link ServiceProperties#ENHANCEMENT_ENGINE_ORDERING}
+ */
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
+ (Object) defaultOrder));
+ }
+
+ @Override
+ public int canEnhance(ContentItem ci) throws EngineException {
+ // check if content is present
+ try {
+ if ((ContentItemHelper.getText(ci.getBlob()) == null)
+ || (ContentItemHelper.getText(ci.getBlob()).trim().isEmpty())) {
+ return CANNOT_ENHANCE;
+ }
+ } catch (IOException e) {
+ log.error("Failed to get the text for " + "enhancement of content: " + ci.getUri(), e);
+ throw new InvalidContentException(this, ci, e);
+ }
+ // default enhancement is synchronous enhancement
+ return ENHANCE_SYNCHRONOUS;
+ }
+
+ /*
+ * This function first evaluates all the possible ambiguations of each text annotation detected. the text
+ * of all entities detected is used for making a Dbpedia query with all string for MLT that contain all
+ * the other entities. The results obtained are used to calcualte new confidence values which are updated
+ * in the metadata.
+ */
+ @Override
+ public void computeEnhancements(ContentItem ci) throws EngineException {
+
+ String textContent;
+ Entry<UriRef,Blob> textBlob = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
+ if (textBlob != null) {
+ try {
+ textContent = ContentItemHelper.getText(textBlob.getValue());
+ } catch (IOException e) {
+ log.warn("Unable to retieve plain text content for ContentItem " + ci.getUri(), e);
+ textContent = null;
+ }
+ } else {
+ textContent = null;
+ }
+
+ MGraph graph = ci.getMetadata();
+
+ // (1) read the data from the content item
+ String contentLangauge;
+ DisambiguationData disData;
+ ci.getLock().readLock().lock();
+ try {
+ contentLangauge = EnhancementEngineHelper.getLanguage(ci);
+ // NOTE (rwesten): moved the parsing of the information from the
+ // contentItem to static method of the Class holding those information
+ // (similar as it already was for SavedEntity)
+ // readEntities(loseConfidence, allEntities, textAnnotations, graph);
+ disData = DisambiguationData.createFromContentItem(ci);
+ } finally {
+ ci.getLock().readLock().unlock();
+ }
+
+ // (2) Disambiguate the SavedEntities
+ for (SavedEntity savedEntity : disData.textAnnotations.values()) {
+ if (savedEntity.getSuggestions().size() <= 1) {
+ // we need not to disambiguate if only one suggestion is present
+ continue;
+ }
+ // NOTE: the site is determined from the
+ // fise:TextAnnotation <-- dc:relation --
+ // fise:EntityAnnotation -- entityhub:ste --> "{siteName}"^^xsd:string
+ // data.
+ // TODO: add configuration to include/exclude Sites by name
+ Site site = siteManager.getSite(savedEntity.getSite());
+ Collection<String> types = null; // potential types of entities
+ boolean casesensitive = false; // TODO: make configurable
+ String savedEntityLabel =
+ casesensitive ? savedEntity.getName() : savedEntity.getName().toLowerCase();
+
+ // Determine the context used for disambiguation
+ // TODO: make this configurable options
+
+ String disambiguationContext;
+ // (0.a) The easiest way is to just use the selection context
+ // disambiguationContext = savedEntity.getContext();
+ // (0.b) Calculate a context based on a moving window
+ String window =
+ getDisambiguationContext(textContent, savedEntity.getName(), savedEntity.getStart(), 100);
+ log.info("Use Window: '{}' for '{}'", window, savedEntity.getName());
+
+ // (1) The contextSelections:
+ // All other selected text within the selection context
+ List<String> contextSelections =
+ getSelectionsInContext(savedEntity.getName(), disData.allSelectedTexts, window);
+ // savedEntity.getContext());
+ disambiguationContext = unionString(false, contextSelections);
+
+ // (2) I do not understand this variant (see comment for the
+ // EntitiesInRange(..) method
+ // List<String> L = EntitiesInRange(disData.directoryTextAnotation,
+ // (savedEntity.getStart() + savedEntity.getEnd()) / 2);
+ // disambiguationContext = unionString(false,contextSelections);
+
+ // (3) one can build a combination of the above
+ // disambiguationContext = unionString(true, //unique adds
+ // Collections.singleton(savedEntity.getName()), //the selected text
+ // Collections.singleton(context), //the context
+ // contextSelections); //other selected parsed in the context
+
+ // or just the name of the entity AND the context
+ // disambiguationContext = unionString(false,
+ // Collections.singleton(savedEntity.getName()),
+ // contextSelections);
+
+ // (4) TODO: I would also like to have the possibility to disambiguate
+ // using URIs of Entities suggested for other TextAnnotations
+ // within the context.
+
+ // make the similarity query on the Entityhub using the collected
+ // information
+ QueryResultList<Entity> results;
+ log.info(" - Query '{}' for {}@{} with context '{}'", new Object[] {site.getId(),
+ savedEntityLabel, contentLangauge, disambiguationContext});
+ if (!StringUtils.isBlank(disambiguationContext)) {
+ try {
+ results = query(site, savedEntityLabel, contentLangauge, disambiguationContext);
+ } catch (SiteException e) {
+ // TODO we could also try to catch those errors ...
+ throw new EngineException("Unable to disambiguate Mention of '" + savedEntity.getName()
+ + "' on Entityhub Site '" + site.getId() + "!", e);
+ }
+ log.debug(" - {} results returned by query {}", results.size(), results.getQuery());
+ // match the results with the suggestions
+ disambiguateSuggestions(results, savedEntity);
+ } else {
+ log.debug(" - not disambiguated because of empty context!");
+ }
+ }
+ // (3) Write back the Results of the Disambiguation process
+ // NOTE (rwesten): In the original version of Kritarth this was done as
+ // part of (2) - disambiguation. This is now changed as in (2) the
+ // disambiguation results are stored in the Suggestions and only
+ // applied to the EnhancementStructure in (3). This allows to reduce the
+ // coverage of the wirte lock needed to be applied to the ContentItem.
+ ci.getLock().writeLock().lock();
+ try {
+ applyDisambiguationResults(graph, disData);
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ }
+
+ /*
+ * Is used to query the Dbpedia with a entity as main constraint and then add string of all other entities
+ * detected as similarity constraints
+ */
+
+ protected QueryResultList<Entity> query(Site dbpediaSite, String savedEntityLabel, String language,
+ String extractionContext) throws SiteException {
+ FieldQuery query = dbpediaSite.getQueryFactory().createFieldQuery();
+ if (savedEntityLabel != null && !savedEntityLabel.isEmpty()) {
+ Constraint labelConstraint;
+ if (language != null) {
+ labelConstraint = new TextConstraint(savedEntityLabel, false, language, null);
+ } else {
+ labelConstraint = new TextConstraint(savedEntityLabel, false);
+ }
+ // TODO: what happens if a recommendation was not based on rdfs:label?
+ query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint);
+ } else {
+ log.warn("parsed label {} was empty or NULL. Will use Similarity constraint only!",
+ savedEntityLabel);
+ }
+ query.setConstraint(SpecialFieldEnum.fullText.getUri(), new SimilarityConstraint(extractionContext));
+ query.setLimit(25);
+
+ return dbpediaSite.findEntities(query);
+ }
+
+ /*
+ * If for an entity the Dbpedia query results in suggestion none of which match the already present
+ * ambiguations, we go with the ambiguations found earlier that is the ones we have with.
+ */
+ // NOTE (rwesten): The disambiguateSuggestions now reduces confidence
+ // values of Suggestions that are not within the disambiguation result
+ // by the #confidenceWeight. So if not a single suggestion do match with
+ // the disambiguation result the ambiguation is kept but the overall
+ // fise:confidence values are reduced by #confidenceWeight (ensured to be
+ // less than 1)
+ // protected List<Triple> unchangedConfidences(List<UriRef> subsumed,
+ // MGraph graph,
+ // List<Triple> loseConfidence) {
+ // for (int i = 0; i < subsumed.size(); i++) {
+ // UriRef uri = subsumed.get(i);
+ // Iterator<Triple> confidenceTriple = graph.filter(uri, ENHANCER_CONFIDENCE, null);
+ // while (confidenceTriple.hasNext()) {
+ // loseConfidence.remove(confidenceTriple.next());
+ // }
+ // }
+ // return loseConfidence;
+ // }
+
+ /**
+ * Applies the disambiguation results to the suggestions of the {@link SavedEntity}.
+ * <p>
+ * This method modifies the state of the {@link SavedEntity#getSuggestions()}
+ *
+ * @param results
+ * the results of the disambiguation request
+ * @param savedEntity
+ * the saved entity to be disambiguated
+ **/
+ protected void disambiguateSuggestions(QueryResultList<Entity> results, SavedEntity savedEntity) {
+ // NOTE (rwesten) We should not score disambiguation results based on
+ // how well the labels match.
+ // Either use directly the scores of the disambiguation results OR
+ // do combine the confidence of the original suggestion with the
+ // scores of the disambiguation
+
+ /*
+ * Algorithm: Combine original confidence with Disambiguation results
+ *
+ * Parameter(s):
+ *
+ * * ratio configured as '{dr}:{cr}' where 'dr' stands for the ratio for the disambiguation score and
+ * 'cr' stand for the ratio for the original fise:confidence of a suggestion (default 1:1) *
+ * disambiguation weight (dw) := dr/(dr+cr) ... already calculated based on the configured ratio in
+ * #disambiguationWeight * confidence weight (cw) := cw/(dr+cr) ... already calculated based on the
+ * configured ratio in #confidenceWeight
+ *
+ * Input(s):
+ *
+ * * confidence (c): the original confidence of a suggestion (range [0..1]) * score (s): the score of
+ * the disambiguation * maximum score (ms): the maximum disambiguation score
+ *
+ * Output
+ *
+ * * disambiguated confidence (dc): the confidence after disambiguation
+ *
+ * Algorithm:
+ *
+ * * normalized score (ns) := s/ms ... ensures range [0..1] for disambiguation scores * disambiguated
+ * confidence = c*cw+ns*dw ... guaranteed to be [0..1]
+ */
+ List<Suggestion> matches = new ArrayList<Suggestion>(results.size());
+ Float maxScore = null;
+ Float maxSuggestedScore = null;
+ Iterator<Entity> guesses = results.iterator();
+ log.info("disambiguate {}: ", savedEntity.getName());
+ while (guesses.hasNext()) {
+ Entity guess = guesses.next();
+ Float score =
+ guess.getRepresentation().getFirst(RdfResourceEnum.resultScore.getUri(), Float.class);
+ if (score == null) {
+ log.warn("Missing Score for Entityhub Query Result {}!", guess.getId());
+ continue;
+ }
+ if (maxScore == null) {
+ maxScore = score;
+ }
+ UriRef uri = new UriRef(guess.getId());
+ Suggestion suggestion = savedEntity.getSuggestion(uri);
+ if (suggestion == null) {
+ log.info(" - not found {}", guess.getId());
+ continue;
+ }
+ if (maxSuggestedScore == null) {
+ maxSuggestedScore = score;
+ }
+ double c = suggestion.getOriginalConfidnece() == null ? 0 : suggestion.getOriginalConfidnece();
+ // TODO (rwesten) we need to find out if we should normalize based on the
+ // maximum score or the maximum score of an suggested one
+ double ns = score / maxSuggestedScore;
+ suggestion.setNormalizedDisambiguationScore(ns);
+ double dc = c * confidenceWeight + ns * disambiguationWeight;
+ suggestion.setDisambiguatedConfidence(dc);
+ log.info(" - found {}, origConf:{}, disScore:{}, disConf:{}",
+ new Object[] {suggestion.getEntityUri(), c, ns, dc});
+ }
+ // if at least one suggestion was also in the disambiguation result
+ if (maxSuggestedScore != null) {
+ // adapt the confidence of suggestions that where not part of the
+ // disambiguation result
+ for (Suggestion suggestion : savedEntity.getSuggestions()) {
+ if (suggestion.getDisambiguatedConfidence() == null) {
+ double c =
+ suggestion.getOriginalConfidnece() == null ? 0 : suggestion
+ .getOriginalConfidnece();
+ suggestion.setDisambiguatedConfidence(c * confidenceWeight);
+ }
+ }
+ } else { // else keep the original results
+ log.info(" - none found");
+ }
+ }
+
+ /*
+ * Checks if there is any common elements amongst the ambiguations amongst latest dbpedia query and intial
+ * ambiguations
+ */
+ // NOTE (rwesten): now done as part of the disambiguateSuggestions(..)
+ // method.
+ // protected boolean intersectionCheck(List<Suggestion> matches,
+ // List<UriRef> subsumed,
+ // MGraph graph,
+ // String contentLangauge) {
+ // for (int i = 0; i < subsumed.size(); i++) {
+ // UriRef uri = subsumed.get(i);
+ //
+ // UriRef uri1 = EnhancementEngineHelper.getReference(graph, uri, new UriRef(NamespaceEnum.fise
+ // + "entity-reference"));
+ //
+ // String selectedText = EnhancementEngineHelper.getString(graph, uri, ENHANCER_ENTITY_LABEL);
+ //
+ // if (selectedText == null) {
+ // continue;
+ // }
+ //
+ // for (int j = 0; j < matches.size(); j++) {
+ // Suggestion suggestion = matches.get(j);
+ // String suggestName = suggestion.getURI();
+ // if (suggestName.compareToIgnoreCase(uri1.getUnicodeString()) == 0) return true;
+ // }
+ // }
+ // return false;
+ // }
+
+ // NOTE (rwesten): one MUST NOT store information of processed ContentItems
+ // as member variables, as one EnhancementEngine instance is
+ // concurrently used to process multiple ContentItems. Because
+ // of that member variables will have data of different
+ // ContentItems!
+ // All those data need to be hold in information that are local
+ // to the processing of a single ContentItem (similar to
+ // SavedEntity).
+ // NOTE moved the DisambiguationData#directoryTextAnotation
+ // public Map<Integer,String> directoryTextAnotation = new HashMap<Integer,String>();
+
+ // TODO: make configureable
+ int radii = 23;
+
+ // Value to be configured
+
+ public boolean toInclude(int k, int s) {
+ if (Math.abs(k - s) < radii && Math.abs(k - s) > 0) {
+ return true;
+ }
+ return false;
+ }
+
+ /*
+ * TODO: rwesten I do not understand what is the intension of this Adding the fise:selection-context of
+ * all entities within a range of #radii characters seams not to be a great way to build a context (or do
+ * i miss something?
+ */
+ @Deprecated
+ // for now until someone can answer the anove question
+ public List<String> EntitiesInRange(NavigableMap<Integer,SavedEntity> map, int radius) {
+ List<String> temp = new ArrayList<String>();
+ // TODO: reimplement using subMap of the parsed NavigableMap map
+ for (Entry<Integer,SavedEntity> entry : map.entrySet()) {
+ Integer s = entry.getKey();
+ String subs = entry.getValue().getContext();
+ if (toInclude(s, radius)) {
+ temp.add(subs);
+ }
+ }
+
+ return temp; // if(Cal(f,k))
+ }
+
+ /**
+ * Returns a list of all fise:selected-text values occurring in the parsed context (excluding the parsed
+ * label if not null
+ *
+ * @param label
+ * The label of the current Entity. parse <code>null</code> if the current label should not be
+ * ignored (and included in the context)
+ * @param allEntities
+ * The collections with all the fise:selection-text values of all fise:TextAnnotations
+ * @param context
+ * @return
+ */
+ protected List<String> getSelectionsInContext(String label, Collection<String> allEntities, String context) {
+ List<String> allEntityString = new ArrayList<String>();
+
+ for (String selectedText : allEntities) {
+ if (context.contains(selectedText) && selectedText.compareToIgnoreCase(label) != 0) {
+ allEntityString.add(selectedText);
+ }
+
+ }
+
+ return allEntityString;
+ }
+
+ public String unionString(boolean unique, Collection<?>... lists) {
+ StringBuilder union = new StringBuilder();
+ HashSet<String> added = new HashSet<String>();
+ for (Collection<?> list : lists) {
+ for (Object entry : list) {
+ if (!unique || added.add(entry.toString())) {
+ union.append(entry);
+ union.append(' ');
+ }
+ }
+ }
+ return union.toString();
+ }
+
+ /*
+ * Finds values the lie in intersection of both the set of disambiguations( the one intially suggested and
+ * the one from dpedia). Update the confidence values of those and make the confidence values of others as
+ * 0 in gainconfidence list
+ */
+ // NOTE (rwesten): intersection is calculated as part of the disambiguateSuggestions(..)
+ // method. Results are stored in the Suggestions (member of SavedEntiy) and
+ // than written back to the EnhancementStructure in a separate step
+ // protected List<Triple> intersection(List<Suggestion> matches,
+ // List<UriRef> subsumed,
+ // MGraph graph,
+ // List<Triple> gainConfidence,
+ // String contentLangauge) {
+ //
+ // for (int i = 0; i < subsumed.size(); i++) {
+ // boolean matchFound = false;
+ // UriRef uri = subsumed.get(i);
+ //
+ // UriRef uri1 = EnhancementEngineHelper.getReference(graph, uri, new UriRef(NamespaceEnum.fise
+ // + "entity-reference"));
+ //
+ // for (int j = 0; j < matches.size(); j++) {
+ // Suggestion suggestion = matches.get(j);
+ // String suggestName = suggestion.getURI();
+ //
+ // if (suggestName != null && uri1 != null
+ // && suggestName.compareToIgnoreCase(uri1.getUnicodeString()) == 0) {
+ // Triple confidenceTriple = new TripleImpl(uri, ENHANCER_CONFIDENCE, LiteralFactory
+ // .getInstance().createTypedLiteral(suggestion.getScore()));
+ // Triple contributorTriple = new TripleImpl((UriRef) confidenceTriple.getSubject(),
+ // new UriRef(NamespaceEnum.dc + "contributor"), LiteralFactory.getInstance()
+ // .createTypedLiteral(this.getClass().getName()));
+ // gainConfidence.add(confidenceTriple);
+ // gainConfidence.add(contributorTriple);
+ // matchFound = true;
+ // }
+ // }
+ //
+ // if (!matchFound) {
+ // Triple confidenceTriple = new TripleImpl(uri, ENHANCER_CONFIDENCE, LiteralFactory
+ // .getInstance().createTypedLiteral(0.0));
+ // Triple contributorTriple = new TripleImpl((UriRef) confidenceTriple.getSubject(), new UriRef(
+ // NamespaceEnum.dc + "contributor"), LiteralFactory.getInstance().createTypedLiteral(
+ // this.getClass().getName()));
+ // gainConfidence.add(confidenceTriple);
+ // gainConfidence.add(contributorTriple);
+ // }
+ // }
+ //
+ // return gainConfidence;
+ // }
+
+ /* Removes the value in lose confidence from the graph */
+ protected void removeOldConfidenceFromGraph(MGraph graph, List<Triple> loseConfidence) {
+ for (int i = 0; i < loseConfidence.size(); i++) {
+ Triple elementToRemove = loseConfidence.get(i);
+ graph.remove(elementToRemove);
+ }
+ }
+
+ /**
+ * Adds the disambiguation results to the enhancement structure
+ *
+ * @param graph
+ * the metadata of the {@link ContentItem}
+ * @param disData
+ * the disambiguation data
+ */
+ protected void applyDisambiguationResults(MGraph graph, DisambiguationData disData) {
+ for (SavedEntity savedEntity : disData.textAnnotations.values()) {
+ for (Suggestion s : savedEntity.getSuggestions()) {
+ if (s.getDisambiguatedConfidence() != null) {
+ if (disData.suggestionMap.get(s.getEntityAnnotation()).size() > 1) {
+ // already encountered AND disambiguated -> we need to clone!!
+ log.info("clone {} suggesting {} for {}[{},{}]({})",
+ new Object[] {s.getEntityAnnotation(), s.getEntityUri(), savedEntity.getName(),
+ savedEntity.getStart(), savedEntity.getEnd(), savedEntity.getUri()});
+ s.setEntityAnnotation(cloneTextAnnotation(graph, s.getEntityAnnotation(),
+ savedEntity.getUri()));
+ log.info(" - cloned {}", s.getEntityAnnotation());
+ }
+ // change the confidence
+ EnhancementEngineHelper.set(graph, s.getEntityAnnotation(), ENHANCER_CONFIDENCE,
+ s.getDisambiguatedConfidence(), literalFactory);
+ EnhancementEngineHelper.addContributingEngine(graph, s.getEntityAnnotation(), this);
+ }
+ }
+ }
+ }
+
+ /**
+ * This creates a 'clone' of the fise:EntityAnnotation where the original does no longer have a
+ * dc:relation to the parsed fise:TextAnnotation and the created clone does only have a dc:relation to the
+ * parsed fise:TextAnnotation.
+ * <p>
+ * This is required by disambiguation because other engines typically only create a single
+ * fise:EntityAnnotation instance if several fise:TextAnnotation do have the same fise:selected-text
+ * values. So for a text that multiple times mentions the same Entity (e.g. "Paris") there will be
+ * multiple fise:TextAnnotations selecting the different mentions of that Entity, but there will be only a
+ * single set of suggestions - fise:EntityAnnotations (e.g. "Paris, France" and "Paris, Texas"). Now lets
+ * assume a text like
+ *
+ * <pre>
+ * Paris is the capital of France and it is worth a visit for sure. But
+ * one can also visit Paris without leaving the United States as there
+ * is also a city with the same name in Texas.
+ * </pre>
+ *
+ * Entity Disambiguation need to be able to have different fise:confidence values for the first and second
+ * mention of Paris and this is only possible of the fise:TextAnnotations of those mentions do NOT refer
+ * to the same set of fise:EntityAnnotations.
+ * <p>
+ * This methods accomplished exactly that as it
+ * <ul>
+ * <li>creates a clone of a fise:EntityAnnotation
+ * <li>removes the dc:relation link to the 2nd mention of Paris from the original
+ * <li>only adds the dc:relation of the end mention to the clone
+ * </ul>
+ * So in the end you will have two fise:EntityAnnotation
+ * <ul>
+ * <li>the original fise:EntityAnnotation with dc:relation to all fise:TextAnnotations other than the 2nd
+ * mention (the one this method was called for)
+ * <li>the cloned fise:EntityAnnnotation with a dc:relation to the 2nd mention.
+ * </ul>
+ *
+ * @param graph
+ * @param entityAnnotation
+ * @param textAnnotation
+ * @return
+ */
+ public static UriRef cloneTextAnnotation(MGraph graph, UriRef entityAnnotation, UriRef textAnnotation) {
+ UriRef copy = new UriRef("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
+ Iterator<Triple> it = graph.filter(entityAnnotation, null, null);
+ // we can not add triples to the graph while iterating. So store them
+ // in a list and add later
+ List<Triple> added = new ArrayList<Triple>(32);
+ while (it.hasNext()) {
+ Triple triple = it.next();
+ if (DC_RELATION.equals(triple.getPredicate())) {
+ if (triple.getObject().equals(textAnnotation)) {
+ // remove the dc relation to the currently processed
+ // textAnnotation from the original
+ it.remove();
+ // and add it to the copy
+ added.add(new TripleImpl(copy, // use the copy as subject!
+ triple.getPredicate(), triple.getObject()));
+ } // else it is not the currently processed TextAnnotation
+ // so we need to keep in in the original and NOT add
+ // it to the copy
+ } else { // we can copy all other information 1:1
+ added.add(new TripleImpl(copy, // use the copy as subject!
+ triple.getPredicate(), triple.getObject()));
+ }
+ }
+ graph.addAll(added);
+ return copy;
+ }
+
+ /* Returns a string on appended text annotations seperated by spaces */
+ protected String getEntitiesfromContext(String label, List<String> allEntities, String context) {
+ String allEntityString = "";
+
+ for (int i = 0; i < allEntities.size(); i++) {
+
+ if (label.compareToIgnoreCase(allEntities.get(i)) != 0 && (context != null)
+ && (context.contains(allEntities.get(i)))) {
+ allEntityString = allEntityString + " " + allEntities.get(i);
+ }
+
+ }
+
+ return allEntityString;
+ }
+
+ protected String deriveSentence(String Context, int a, int b) {
+ String allEntityString = "";
+ String start = Context.substring(0, a);
+ String end = Context.substring(b);
+ int s = start.lastIndexOf('.');
+ int e = end.indexOf('.');
+ if (s < 0) {
+ if (e < 0) return Context;
+ else return Context.substring(0, b + e);
+ } else {
+ if (e < 0) return Context.substring(s);
+ else return Context.substring(s + 1, b + e);
+ }
+
+ }
+
+ /**
+ * Extracts the selection context based on the content, selection and the start char offset of the
+ * selection
+ *
+ * @param content
+ * the content
+ * @param selection
+ * the selected text
+ * @param selectionStartPos
+ * the start char position of the selection
+ * @param contextSize
+ * the size of the context in characters
+ * @return the context
+ */
+ public static String getDisambiguationContext(String content, String selection, int selectionStartPos,
+ int contextSize) {
+ // extract the selection context
+ int beginPos;
+ if (selectionStartPos <= contextSize) {
+ beginPos = 0;
+ } else {
+ int start = selectionStartPos - contextSize;
+ beginPos = start;
+ int c;
+ do {
+ c = content.codePointAt(beginPos);
+ beginPos++;
+ } while (beginPos <= selectionStartPos || Character.isWhitespace(c)
+ || Character.getType(c) == Character.SPACE_SEPARATOR);
+ if (beginPos < 0 || beginPos >= selectionStartPos) { // no words
+ beginPos = start; // begin within a word
+ }
+ }
+ int endPos;
+ if (selectionStartPos + selection.length() + contextSize >= content.length()) {
+ endPos = content.length();
+ } else {
+ int selectionEndPos = selectionStartPos + selection.length();
+ int end = selectionEndPos + contextSize;
+ endPos = end;
+ int c;
+ do {
+ c = content.codePointAt(endPos);
+ endPos--;
+ } while (endPos > selectionEndPos || Character.isWhitespace(c)
+ || Character.getType(c) == Character.SPACE_SEPARATOR);
+ if (endPos <= selectionStartPos + selection.length()) {
+ endPos = end; // end within a word;
+ }
+ }
+ return content.substring(beginPos, endPos);
+ }
+
+ /**
+ * Activate and read the properties
+ *
+ * @param ce
+ * the {@link ComponentContext}
+ */
+ @Activate
+ protected void activate(ComponentContext ce) throws ConfigurationException {
+ try {
+ super.activate(ce);
+ } catch (IOException e) {
+ // log
+ log.error("Failed to update the configuration", e);
+ }
+ @SuppressWarnings("unchecked")
+ Dictionary<String,Object> properties = ce.getProperties();
+ // update the service URL if it is defined
+ // if (properties.get(FORMCEPT_SERVICE_URL) != null) {
+ // this.serviceURL = (String) properties.get(FORMCEPT_SERVICE_URL);
+ // }
+ }
+
+ /**
+ * Deactivate
+ *
+ * @param ce
+ * the {@link ComponentContext}
+ */
+ @Deactivate
+ protected void deactivate(ComponentContext ce) {
+ super.deactivate(ce);
+ }
+
+ /**
+ * Gets the Service URL
+ *
+ * @return
+ */
+ public String getServiceURL() {
+ return serviceURL;
+ }
+
+ // private static double levenshtein(String s1, String s2) {
+ // if (s1 == null || s2 == null) {
+ // throw new IllegalArgumentException("NONE of the parsed String MUST BE NULL!");
+ // }
+ // s1 = StringUtils.trim(s1);
+ // s2 = StringUtils.trim(s2);
+ // return s1.isEmpty() || s2.isEmpty() ? 0
+ // : 1.0 - (((double) getLevenshteinDistance(s1, s2)) / ((double) (Math.max(s1.length(),
+ // s2.length()))));
+ // }
+
+}
\ No newline at end of file
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/DisambiguatorEngine.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/SavedEntity.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/SavedEntity.java?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/SavedEntity.java (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/SavedEntity.java Tue Feb 19 05:14:18 2013
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public final class SavedEntity {
+ private static final Logger log = LoggerFactory.getLogger(SavedEntity.class);
+
+ /**
+ * The {@link LiteralFactory} used to create typed RDF literals
+ */
+ private final static LiteralFactory literalFactory = LiteralFactory.getInstance();
+ private String name;
+ private UriRef type;
+ private UriRef uri;
+ private String context;
+ private Integer start;
+ private Integer end;
+
+ /**
+ * Map with the suggestion. The key is the URI of the fise:EntityAnnotation and the value is the Triple
+ * with the confidence value
+ */
+ private Map<UriRef,Suggestion> suggestions = new LinkedHashMap<UriRef,Suggestion>();
+
+ /**
+ * The name of the Entityhub {@link Site} managing the suggestions of this fise:TextAnnotation
+ */
+ private String site;
+
+ /**
+ * private constructor only used by {@link #createFromTextAnnotation(TripleCollection, NonLiteral)}
+ */
+ private SavedEntity() {}
+
+ /**
+ * creates a SavedEntity instance for the parsed fise:TextAnnotation
+ *
+ * @param graph
+ * the graph with the information
+ * @param textAnnotation
+ * the fise:TextAnnotation
+ * @return the {@link SavedEntity} or <code>null</code> if the parsed text annotation is missing required
+ * information.
+ */
+ public static SavedEntity createFromTextAnnotation(TripleCollection graph, UriRef textAnnotation) {
+ SavedEntity entity = new SavedEntity();
+ entity.uri = textAnnotation;
+ entity.name = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
+ if (entity.name == null) {
+ log.debug("Unable to create SavedEntity for TextAnnotation {} "
+ + "because property {} is not present", textAnnotation, ENHANCER_SELECTED_TEXT);
+ return null;
+ }
+ // NOTE rwesten: I think one should not change the selected text
+ // remove punctuation form the search string
+ // entity.name = cleanupKeywords(name);
+ if (entity.name.isEmpty()) {
+ log.debug("Unable to process TextAnnotation {} because its selects " + "an empty Stirng !",
+ textAnnotation);
+ return null;
+ }
+ entity.type = EnhancementEngineHelper.getReference(graph, textAnnotation, DC_TYPE);
+ // NOTE rwesten: TextAnnotations without dc:type should be still OK
+ // if (type == null) {
+ // log.warn("Unable to process TextAnnotation {} because property {}"
+ // + " is not present!",textAnnotation, DC_TYPE);
+ // return null;
+ // }
+ entity.context = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTION_CONTEXT);
+ Integer start =
+ EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_START, Integer.class,
+ literalFactory);
+ Integer end =
+ EnhancementEngineHelper.get(graph, textAnnotation, ENHANCER_END, Integer.class,
+ literalFactory);
+ if (start == null || end == null) {
+ log.debug("Unable to process TextAnnotation {} because the start and/or the end "
+ + "position is not defined (selectedText: {}, start: {}, end: {})", new Object[] {
+ textAnnotation, entity.name, start, end});
+
+ }
+ entity.start = start;
+ entity.end = end;
+
+ // parse the suggestions
+
+ // all the entityhubSites that manage a suggested Entity
+ // (hopefully only a single one)
+ Set<String> entityhubSites = new HashSet<String>();
+ List<Suggestion> suggestionList = new ArrayList<Suggestion>();
+ Iterator<Triple> suggestions = graph.filter(null, Properties.DC_RELATION, textAnnotation);
+ // NOTE: this iterator will also include dc:relation between fise:TextAnnotation's
+ // but in those cases NULL will be returned as suggestion
+ while (suggestions.hasNext()) {
+ UriRef entityAnnotation = (UriRef) suggestions.next().getSubject();
+ Suggestion suggestion = Suggestion.createFromEntityAnnotation(graph, entityAnnotation);
+ if (suggestion != null) {
+ suggestionList.add(suggestion);
+ if (suggestion.getSite() != null) {
+ entityhubSites.add(suggestion.getSite());
+ }
+ }
+ }
+ if (suggestionList.isEmpty()) {
+ log.warn("TextAnnotation {} (selectedText: {}, start: {}) has no" + "suggestions.", new Object[] {
+ entity.uri, entity.name, entity.start});
+ return null; // nothing to disambiguate
+ } else {
+ Collections.sort(suggestionList); // sort them based on confidence
+ // the LinkedHashMap will keep the order (based on the original
+ // confidence)
+ for (Suggestion suggestion : suggestionList) {
+ entity.suggestions.put(suggestion.getEntityUri(), suggestion);
+ }
+ }
+ if (entityhubSites.isEmpty()) {
+ log.debug("TextAnnotation {} (selectedText: {}, start: {}) has "
+ + "suggestions do not have 'entityhub:site' information. "
+ + "Can not disambiguate because origin is unknown.", new Object[] {entity.uri,
+ entity.name, entity.start});
+ return null; // Ignore TextAnnotatiosn with suggestions of unknown origin.
+ } else if (entityhubSites.size() > 1) {
+ log.warn("TextAnnotation {} (selectedText: {}, start: {}) has "
+ + "suggestions originating from multiple Entityhub Sites {}", new Object[] {entity.uri,
+ entity.name, entity.start, entityhubSites});
+ return null; // TODO: Ignore those for now
+ } else {
+ entity.site = entityhubSites.iterator().next();
+ }
+ return entity;
+ }
+
+ /**
+ * Removes punctuation form a parsed string
+ */
+ private static String cleanupKeywords(String keywords) {
+ return keywords.replaceAll("\\p{P}", " ").trim();
+ }
+
+ /**
+ * Getter for the name
+ *
+ * @return the name
+ */
+ public final String getName() {
+ return name;
+ }
+
+ /**
+ * Getter for the type
+ *
+ * @return the type
+ */
+ public final UriRef getType() {
+ return type;
+ }
+
+ @Override
+ public int hashCode() {
+ return uri.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return o instanceof SavedEntity && uri.equals(((SavedEntity) o).uri);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("SavedEntity %s (name=%s | type=%s)", uri, name, type);
+ }
+
+ public UriRef getUri() {
+ return this.uri;
+ }
+
+ public String getContext() {
+ return this.context;
+ }
+
+ public int getStart() {
+ return this.start;
+ }
+
+ public int getEnd() {
+ return this.end;
+ }
+
+ public Collection<Suggestion> getSuggestions() {
+ return suggestions.values();
+ }
+
+ public Suggestion getSuggestion(UriRef uri) {
+ return suggestions.get(uri);
+ }
+
+ /**
+ * The name of the Entityhub {@link Site} managing the suggestions
+ *
+ * @return
+ */
+ public String getSite() {
+ return site;
+ }
+}
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/SavedEntity.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java Tue Feb 19 05:14:18 2013
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.entityhub.servicesapi.model.Entity;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A suggestion of an {@link Entity} for a fise:TextAnnotation processed by the NamedEntityTaggingEngine
+ */
+public class Suggestion implements Comparable<Suggestion> {
+
+ private static final Logger log = LoggerFactory.getLogger(Suggestion.class);
+
+ private static final LiteralFactory lf = LiteralFactory.getInstance();
+
+ private static final UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
+
+ private UriRef entityAnnotation;
+ private UriRef entityUri;
+ private Double originalConfidnece;
+
+ private Entity entity;
+ private Double normalizedDisambiguationScore;
+ private Double disambiguatedConfidence;
+ private String site;
+
+ private Suggestion(UriRef entityAnnotation) {
+ this.entityAnnotation = entityAnnotation;
+ }
+
+ public Suggestion(Entity entity) {
+ this.entity = entity;
+ this.entityUri = new UriRef(entity.getId());
+ this.site = entity.getSite();
+ }
+
+ /**
+ * Allows to create Suggestions from existing fise:TextAnnotation contained in the metadata of the
+ * processed {@link ContentItem}
+ *
+ * @param graph
+ * @param entityAnnotation
+ * @return
+ */
+ public static Suggestion createFromEntityAnnotation(TripleCollection graph, UriRef entityAnnotation) {
+ Suggestion suggestion = new Suggestion(entityAnnotation);
+ suggestion.entityUri =
+ EnhancementEngineHelper.getReference(graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE);
+ if (suggestion.entityUri == null) {
+ // most likely not a fise:EntityAnnotation
+ log.debug("Unable to create Suggestion for EntityAnnotation {} "
+ + "because property {} is not present", entityAnnotation, ENHANCER_ENTITY_REFERENCE);
+ return null;
+ }
+ suggestion.originalConfidnece =
+ EnhancementEngineHelper.get(graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
+ if (suggestion.originalConfidnece == null) {
+ log.warn("EntityAnnotation {} does not define a value for "
+ + "property {}. Will use '0' as fallback", entityAnnotation, ENHANCER_CONFIDENCE);
+ suggestion.originalConfidnece = 0.0;
+ }
+ suggestion.site = EnhancementEngineHelper.getString(graph, entityAnnotation, ENTITYHUB_SITE);
+ // NOTE: site might be NULL
+ return suggestion;
+ }
+
+ /**
+ * The URI of the fise:EntityAnnotation representing this suggestion in the
+ * {@link ContentItem#getMetadata() metadata} of the processed {@link ContentItem}. This will be
+ * <code>null</code> if this Suggestion was created as part of the Disambiguation process and was not
+ * present in the metadata of the content item before the disambiguation.
+ *
+ * @return the URI of the fise:EntityAnnotation or <code>null</code> if not present.
+ */
+ public UriRef getEntityAnnotation() {
+ return entityAnnotation;
+ }
+
+ /**
+ * Allows to set the URI of the fise:EntityAnnotation. This is required if the original enhancement
+ * structure shared one fise:EntityAnnotation instance for two fise:TextAnnotations (e.g. because both
+ * TextAnnotations had the exact same value for fise:selected-text). After disambiguation it is necessary
+ * to 'clone' fise:EntityAnnotations like that to give them different fise:confidence values. Because of
+ * that it is supported to set the new URI of the cloned fise:EntityAnnotation.
+ *
+ * @param uri
+ * the uri of the cloned fise:EntityAnnotation
+ */
+ public void setEntityAnnotation(UriRef uri) {
+ this.entityAnnotation = uri;
+ }
+
+ /**
+ * The URI of the Entity (MUST NOT be <code>null</code>)
+ *
+ * @return the URI
+ */
+ public UriRef getEntityUri() {
+ return entityUri;
+ }
+
+ /**
+ * The original confidence of the fise:EntityAnnotation or <code>null</code> if not available.
+ *
+ * @return
+ */
+ public Double getOriginalConfidnece() {
+ return originalConfidnece;
+ }
+
+ /**
+ * The {@link Entity} or <code>null</code> if not available. For Suggestions that are created based on
+ * fise:EntityAnnotations the Entity is not available. Entities might be loaded as part of the
+ * Disambiguation process.
+ *
+ * @return the {@link Entity} or <code>null</code> if not available
+ */
+ public Entity getEntity() {
+ return entity;
+ }
+
+ /**
+ * The score of the disambiguation. This is just the score of the disambiguation that is not yet combined
+ * with the {@link #getOriginalConfidnece()} to become the {@link #getDisambiguatedConfidence()}
+ *
+ * @return the disambiguation score
+ */
+ public Double getNormalizedDisambiguationScore() {
+ return normalizedDisambiguationScore;
+ }
+
+ /**
+ * The confidence after disambiguation. Will be <code>null</code> at the beginning
+ *
+ * @return the disambiguated confidence or <code>null</code> if not yet disambiguated
+ */
+ public Double getDisambiguatedConfidence() {
+ return disambiguatedConfidence;
+ }
+
+ /**
+ * The name of the Entityhub {@link Site} the suggested Entity is managed.
+ *
+ * @return the name of the Entityhub {@link Site}
+ */
+ public String getSite() {
+ return site;
+ }
+
+ /**
+ * Setter for the normalized [0..1] score of the disambiguation
+ *
+ * @param normalizedDisambiguationScore
+ */
+ public void setNormalizedDisambiguationScore(Double normalizedDisambiguationScore) {
+ this.normalizedDisambiguationScore = normalizedDisambiguationScore;
+ }
+
+ /**
+ * Setter for the confidence after disambiguation
+ *
+ * @param disambiguatedConfidence
+ */
+ public void setDisambiguatedConfidence(Double disambiguatedConfidence) {
+ this.disambiguatedConfidence = disambiguatedConfidence;
+ }
+
+ @Override
+ public int hashCode() {
+ return entityUri.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj instanceof Suggestion && ((Suggestion) obj).entityUri.equals(entityUri);
+ }
+
+ /**
+ * Compares based on the {@link #getDisambiguatedConfidence()} (if present) and falls back to the
+ * {@link #getOriginalConfidnece()}. If the original confidence value is not present or both Suggestions
+ * do have the same confidence the natural order of the Entities URI is used. This also ensures
+ * <code>(x.compareTo(y)==0) == (x.equals(y))</code> and allows to use this class with {@link SortedMap}
+ * and {@link SortedSet} implementations.
+ * <p>
+ */
+ @Override
+ public int compareTo(Suggestion other) {
+ int result;
+ if (disambiguatedConfidence != null && other.disambiguatedConfidence != null) {
+ result = other.disambiguatedConfidence.compareTo(disambiguatedConfidence);
+ } else if (other.originalConfidnece != null && originalConfidnece != null) {
+ result = other.originalConfidnece.compareTo(originalConfidnece);
+ } else {
+ result = 0;
+ }
+ // ensure (x.compareTo(y)==0) == (x.equals(y))
+ return result == 0 ? entityUri.getUnicodeString().compareTo(other.entityUri.getUnicodeString())
+ : result;
+ }
+
+}
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1447589&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties (added)
+++ stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties Tue Feb 19 05:14:18 2013
@@ -0,0 +1,29 @@
+#
+# Copyright 2012, FORMCEPT [http://www.formcept.com]
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+stanbol.enhancer.engine.name.name=Name
+stanbol.enhancer.engine.name.description=The name of the enhancement engine as \
+used in the RESTful interface '/engine/<name>'
+service.ranking.name=Ranking
+service.ranking.description=If two enhancement engines with the same name are active the \
+one with the higher ranking will be used to process parsed content items.
+
+#====================================================
+#Properties used to configure FORMCEPT Enhancer
+#====================================================
+
+org.formcept.engine.enhancer.url.name=FORMCEPT Service URL
+org.formcept.engine.enhancer.url.desc=The URL of the FORMCEPT Enhancement Service
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties
------------------------------------------------------------------------------
svn:executable = *
Propchange: stanbol/trunk/enhancement-engines/disambiguation-mlt/src/main/resources/OSGI-INF/metatype/metatype.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain