You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/31 11:39:58 UTC
svn commit: r1379385 [2/2] - in
/incubator/stanbol/branches/disambiguation-engine:
bundlelist/src/main/bundles/ defaults/src/main/resources/config/
engines/disambiguation-mlt/
engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/...
Modified: incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java Fri Aug 31 09:39:57 2012
@@ -16,91 +16,216 @@
*/
package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.entityhub.servicesapi.model.Entity;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* A suggestion of an {@link Entity} for a fise:TextAnnotation processed
* by the NamedEntityTaggingEngine
- * @author Rupert Westenthaler
*/
public class Suggestion implements Comparable<Suggestion>{
- private final Entity entity;
- private double levenshtein = -1;
- private Double score;
- private Text matchedLabel;
- private String URI;
- protected Suggestion(Entity entity){
- this.entity = entity;
+
+ private static final Logger log = LoggerFactory.getLogger(Suggestion.class);
+
+ private static final LiteralFactory lf = LiteralFactory.getInstance();
+
+ private static final UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
+
+ private UriRef entityAnnotation;
+ private UriRef entityUri;
+ private Double originalConfidnece;
+
+ private Entity entity;
+ private Double normalizedDisambiguationScore;
+ private Double disambiguatedConfidence;
+ private String site;
+
+
+ private Suggestion(UriRef entityAnnotation){
+ this.entityAnnotation = entityAnnotation;
}
+ public Suggestion(Entity entity){
+ this.entity = entity;
+ this.entityUri = new UriRef(entity.getId());
+ this.site = entity.getSite();
+ }
/**
- * @return the levenshtein
+ * Allows to create Suggestions from existing fise:TextAnnotation contained
+ * in the metadata of the processed {@link ContentItem}
+ * @param graph
+ * @param entityAnnotation
+ * @return
*/
- public final double getLevenshtein() {
- return levenshtein;
+ public static Suggestion createFromEntityAnnotation(TripleCollection graph, UriRef entityAnnotation){
+ Suggestion suggestion = new Suggestion(entityAnnotation);
+ suggestion.entityUri = EnhancementEngineHelper.getReference(
+ graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE);
+ if(suggestion.entityUri == null){
+ //most likely not a fise:EntityAnnotation
+ log.debug("Unable to create Suggestion for EntityAnnotation {} "
+ + "because property {} is not present", entityAnnotation, ENHANCER_ENTITY_REFERENCE);
+ return null;
+ }
+ suggestion.originalConfidnece = EnhancementEngineHelper.get(
+ graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
+ if(suggestion.originalConfidnece == null){
+ log.warn("EntityAnnotation {} does not define a value for "
+ + "property {}. Will use '0' as fallback", entityAnnotation, ENHANCER_CONFIDENCE);
+ suggestion.originalConfidnece = 0.0;
+ }
+ suggestion.site = EnhancementEngineHelper.getString(graph, entityAnnotation, ENTITYHUB_SITE);
+ //NOTE: site might be NULL
+ return suggestion;
}
-
/**
- * @param levenshtein the levenshtein to set
+ * The URI of the fise:EntityAnnotation representing this suggestion in the
+ * {@link ContentItem#getMetadata() metadata} of the processed
+ * {@link ContentItem}. This will be <code>null</code> if this Suggestion
+ * was created as part of the Disambiguation process and was not present
+ * in the metadata of the content item before the disambiguation.
+ * @return the URI of the fise:EntityAnnotation or <code>null</code> if
+ * not present.
*/
- protected final void setLevenshtein(double levenshtein) {
- this.levenshtein = levenshtein;
+ public UriRef getEntityAnnotation() {
+ return entityAnnotation;
}
-
-
/**
- * @return the score
+ * Allows to set the URI of the fise:EntityAnnotation. This is required
+ * if the original enhancement structure shared one fise:EntityAnnotation
+ * instance for two fise:TextAnnotations (e.g. because both TextAnnotations
+ * had the exact same value for fise:selected-text). After
+ * disambiguation it is necessary to 'clone' fise:EntityAnnotations like
+ * that to give them different fise:confidence values. Because of that
+ * it is supported to set the new URI of the cloned fise:EntityAnnotation.
+ * @param uri the uri of the cloned fise:EntityAnnotation
*/
- public final Double getScore() {
- return score;
+ public void setEntityAnnotation(UriRef uri) {
+ this.entityAnnotation = uri;
}
-
/**
- * @param score the score to set
+ * The URI of the Entity (MUST NOT be <code>null</code>)
+ * @return the URI
*/
- protected final void setScore(Double score) {
- this.score = score;
+ public UriRef getEntityUri() {
+ return entityUri;
}
-
/**
- * @return the matchedLabel
+ * The original confidence of the fise:EntityAnnotation or <code>null</code>
+ * if not available.
+ * @return
*/
- public final Text getMatchedLabel() {
- return matchedLabel;
+ public Double getOriginalConfidnece() {
+ return originalConfidnece;
}
-
/**
- * @param matchedLabel the matchedLabel to set
+ * The {@link Entity} or <code>null</code> if not available. For
+ * Suggestions that are created based on fise:EntityAnnotations the Entity
+ * is not available. Entities might be loaded as part of the
+ * Disambiguation process.
+ * @return the {@link Entity} or <code>null</code> if not available
+ */
+ public Entity getEntity() {
+ return entity;
+ }
+
+ /**
+ * The score of the disambiguation. This is just the score of the
+ * disambiguation that is not yet combined with the
+ * {@link #getOriginalConfidnece()} to become the
+ * {@link #getDisambiguatedConfidence()}
+ * @return the disambiguation score
*/
- protected final void setMatchedLabel(Text matchedLabel) {
- this.matchedLabel = matchedLabel;
+ public Double getNormalizedDisambiguationScore() {
+ return normalizedDisambiguationScore;
}
- protected final String getURI() {
- return this.URI;
+ /**
+ * The confidence after disambiguation. Will be <code>null</code> at the
+ * beginning
+ * @return the disambiguated confidence or <code>null</code> if not yet
+ * disambiguated
+ */
+ public Double getDisambiguatedConfidence() {
+ return disambiguatedConfidence;
}
- protected final void setURI(String URI) {
- this.URI = URI;
+ /**
+ * The name of the Entityhub {@link Site} the suggested Entity is
+ * managed.
+ * @return the name of the Entityhub {@link Site}
+ */
+ public String getSite() {
+ return site;
}
-
/**
- * @return the entity
+ * Setter for the normalized [0..1] score of the disambiguation
+ * @param normalizedDisambiguationScore
*/
- public final Entity getEntity() {
- return entity;
+ public void setNormalizedDisambiguationScore(Double normalizedDisambiguationScore) {
+ this.normalizedDisambiguationScore = normalizedDisambiguationScore;
+ }
+ /**
+ * Setter for the confidence after disambiguation
+ * @param disambiguatedConfidence
+ */
+ public void setDisambiguatedConfidence(Double disambiguatedConfidence) {
+ this.disambiguatedConfidence = disambiguatedConfidence;
}
-
@Override
- public int compareTo(Suggestion other) {
- return other.score.compareTo(score);
+ public int hashCode() {
+ return entityUri.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return obj instanceof Suggestion && ((Suggestion)obj).entityUri.equals(entityUri);
}
+ /**
+ * Compares based on the {@link #getDisambiguatedConfidence()} (if present)
+ * and falls back to the {@link #getOriginalConfidnece()}. If the
+ * original confidence value is not present or both Suggestions do have
+ * the same confidence the natural order of the Entities URI is used. This
+ * also ensures <code>(x.compareTo(y)==0) == (x.equals(y))</code> and
+ * allows to use this class with {@link SortedMap} and {@link SortedSet}
+ * implementations.<p>
+ */
+ @Override
+ public int compareTo(Suggestion other) {
+ int result;
+ if(disambiguatedConfidence != null && other.disambiguatedConfidence != null){
+ result = other.disambiguatedConfidence.compareTo(disambiguatedConfidence);
+ } else if(other.originalConfidnece != null && originalConfidnece != null){
+ result = other.originalConfidnece.compareTo(originalConfidnece);
+ } else {
+ result = 0;
+ }
+ //ensure (x.compareTo(y)==0) == (x.equals(y))
+ return result == 0 ? entityUri.getUnicodeString().compareTo(
+ other.entityUri.getUnicodeString()) : result;
+ }
}
Modified: incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java Fri Aug 31 09:39:57 2012
@@ -533,7 +533,7 @@ public class NamedEntityTaggingEngine ex
// and labels in the same language as the content
(language != null && label.getLanguage().startsWith(language))) {
double actMatch = levenshtein(
- casesensitive ? label.getText().toLowerCase() : label.getText(), namedEntityLabel);
+ casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
if (actMatch > match.getLevenshtein()) {
match.setLevenshtein(actMatch);
match.setMatchedLabel(label);
Modified: incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Fri Aug 31 09:39:57 2012
@@ -741,7 +741,7 @@ public class KeywordLinkingEngine
if(minSearchTokenLength < 1){
throw new ConfigurationException(MIN_SEARCH_TOKEN_LENGTH, "Values MUST be valid Integer values > 0");
}
- linkerConfig.setMaxSuggestions(minSearchTokenLength);
+ linkerConfig.setMinSearchTokenLength(minSearchTokenLength);
}
//init the REDIRECT_PROCESSING_MODE
value = configuration.get(REDIRECT_PROCESSING_MODE);
Modified: incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java Fri Aug 31 09:39:57 2012
@@ -410,7 +410,9 @@ public class EntityLinker {
//ensure the correct order of the tokens in the suggested entity
boolean search = true;
int firstFoundIndex = -1;
+ int firstProcessableFoundIndex = -1;
int lastFoundIndex = -1;
+ int lastProcessableFoundIndex = -1;
int firstFoundLabelIndex = -1;
int lastfoundLabelIndex = -1;
Token currentToken;
@@ -462,6 +464,10 @@ public class EntityLinker {
if(found){ //found
if(isProcessable){
foundProcessableTokens++; //only count processable Tokens
+ if(firstProcessableFoundIndex < 0){
+ firstProcessableFoundIndex = currentIndex;
+ }
+ lastProcessableFoundIndex = currentIndex;
}
foundTokens++;
foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches
@@ -512,6 +518,7 @@ public class EntityLinker {
if(found){ //found
if(isProcessable){
foundProcessableTokens++; //only count processable Tokens
+ firstProcessableFoundIndex = currentIndex;
}
foundTokens++;
foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches
@@ -533,6 +540,7 @@ public class EntityLinker {
//e.g. if given and family name of persons are switched
MATCH labelMatch;
int coveredTokens = lastFoundIndex-firstFoundIndex+1;
+ int coveredProcessableTokens = lastProcessableFoundIndex-firstProcessableFoundIndex+1;
float labelMatchScore = (foundTokenMatch/(float)labelTokens.length);
//Matching rules
// - if less than config#minTokenFound() than accept only EXACT
@@ -552,8 +560,8 @@ public class EntityLinker {
// Tokens are found, but if all Tokens of the Label are
// matched! (STANBOL-622)
//foundTokens == coveredTokens) &&
- foundTokens >= labelTokens.length) &&
- labelMatchScore >= 0.6f){
+ foundTokens >= labelTokens.length)){ //&&
+ //labelMatchScore >= 0.6f){
//same as above
//if(foundTokens == coveredTokens){
if(foundTokens == labelTokens.length && foundTokens == coveredTokens){
@@ -568,7 +576,9 @@ public class EntityLinker {
if(match.getMatchCount() < foundProcessableTokens ||
match.getMatchCount() == foundProcessableTokens &&
labelMatch.ordinal() > match.getMatch().ordinal()){
- match.updateMatch(labelMatch, firstFoundIndex, coveredTokens, foundTokens,
+// match.updateMatch(labelMatch, firstFoundIndex, coveredTokens, foundTokens,
+// foundTokenMatch/foundTokens,label,labelTokens.length);
+ match.updateMatch(labelMatch, firstProcessableFoundIndex, coveredProcessableTokens, foundProcessableTokens,
foundTokenMatch/foundTokens,label,labelTokens.length);
} //else this match is not better as the existing one
} //else ignore labels with MATCH.NONE
Modified: incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Fri Aug 31 09:39:57 2012
@@ -16,11 +16,13 @@
*/
package org.apache.stanbol.enhancer.servicesapi.helper;
+import static java.util.Collections.singleton;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
@@ -35,6 +37,7 @@ import org.apache.clerezza.rdf.core.Lite
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.TypedLiteral;
@@ -205,6 +208,23 @@ public class EnhancementEngineHelper {
return enhancement;
}
/**
+ * Adds the parsed {@link EnhancementEngine} as dc:contributer to the
+ * enhancement and also sets the dc:modified property accordingly
+ * @param metadata the {@link ContentItem#getMetadata()}
+ * @param enhancement the enhancement
+ * @param engine the engine
+ */
+ public static void addContributingEngine(MGraph metadata, UriRef enhancement,
+ EnhancementEngine engine){
+ LiteralFactory literalFactory = LiteralFactory.getInstance();
+ // TODO: use a public dereferencing URI instead?
+ metadata.add(new TripleImpl(enhancement, Properties.DC_CONTRIBUTOR,
+ literalFactory.createTypedLiteral(engine.getClass().getName())));
+ //set the modification date to the current date.
+ set(metadata,enhancement,Properties.DC_MODIFIED,new Date(),literalFactory);
+ }
+
+ /**
* Create a new extraction instance in the metadata-graph of the content
* item along with default properties (dc:creator and dc:created) and return
* the UriRef of the extraction so that engines can further add
@@ -288,6 +308,76 @@ public class EnhancementEngineHelper {
}
}
/**
+ * Replaces all current values of the property for the resource
+ * with the parsed value
+ * @param graph the graph
+ * @param resource the resource
+ * @param property the property
+ * @param value the value
+ */
+ public static void set(MGraph graph, NonLiteral resource, UriRef property, Resource value){
+ set(graph,resource,property,value == null ? null : singleton(value),null);
+ }
+ /**
+ * Replaces all current values of the property for the resource
+ * with the parsed values
+ * @param graph the graph
+ * @param resource the resource
+ * @param property the property
+ * @param value the value
+ */
+ public static void set(MGraph graph, NonLiteral resource, UriRef property, Collection<Resource> values){
+ set(graph,resource,property,values,null);
+ }
+
+ /**
+ * Replaces all current values of the property for the resource
+ * with the parsed value
+ * @param graph the graph
+ * @param resource the resource
+ * @param property the property
+ * @param value the value. In case it is an instance of {@link Resource} it
+ * is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
+ * is used to create a {@link TypedLiteral} for the parsed value.
+ * @param literalFactory the {@link LiteralFactory} used in case the parsed
+ * value is not an {@link Resource}
+ */
+ public static void set(MGraph graph, NonLiteral resource, UriRef property,
+ Object value, LiteralFactory literalFactory){
+ set(graph,resource,property,value == null ? null : singleton(value),literalFactory);
+ }
+ /**
+ * Replaces all current values of the property for the resource
+ * with the parsed values
+ * @param graph the graph
+ * @param resource the resource
+ * @param property the property
+ * @param value the value. In case it is an instance of {@link Resource} it
+ * is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
+ * is used to create a {@link TypedLiteral} for the parsed value.
+ * @param literalFactory the {@link LiteralFactory} used in case the parsed
+ * value is not an {@link Resource}
+ */
+ public static void set(MGraph graph, NonLiteral resource, UriRef property,
+ Collection<?> values, LiteralFactory literalFactory){
+ Iterator<Triple> currentValues = graph.filter(resource, property, null);
+ while(currentValues.hasNext()){
+ currentValues.next();
+ currentValues.remove();
+ }
+ if(values != null){
+ for(Object value : values){
+ if(value instanceof Resource){
+ graph.add(new TripleImpl(resource, property, (Resource) value));
+ } else if (value != null){
+ graph.add(new TripleImpl(resource, property,
+ literalFactory.createTypedLiteral(value)));
+ }
+ }
+ }
+ }
+
+ /**
* Getter for the typed literal values of the property for a resource
* @param <T> the java class the literal value needs to be converted to.
* Note that the parsed LiteralFactory needs to support this conversion
Modified: incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java Fri Aug 31 09:39:57 2012
@@ -61,11 +61,27 @@ public class Properties {
+ "created");
/**
+ * Modification date of a resource. Used by Stanbol Enhancer to annotate the
+ * modification date of the enhancement if it was changed by an other
+ * enhancement engine as the one creating it. Multiple changes of the
+ * creating enhancement engines are not considered as modifications.
+ */
+ public static final UriRef DC_MODIFIED = new UriRef(NamespaceEnum.dc
+ + "modified");
+
+ /**
* The entity responsible for the creation of a resource. Used by Stanbol Enhancer to
* annotate the enhancement engine that created an enhancement
*/
public static final UriRef DC_CREATOR = new UriRef(NamespaceEnum.dc
+ "creator");
+ /**
+ * The entity contributed to a resource. Used by Stanbol Enhancer to
+ * annotate the enhancement engine that changed an enhancement originally
+ * created by an other enhancemetn engine
+ */
+ public static final UriRef DC_CONTRIBUTOR = new UriRef(NamespaceEnum.dc
+ + "contributor");
/**
* The nature or genre of the resource. Stanbol Enhancer uses this property to refer to
Modified: incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java Fri Aug 31 09:39:57 2012
@@ -28,8 +28,10 @@ import static org.apache.stanbol.enhance
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LAT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LONG;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
@@ -93,6 +95,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.execution.Execution;
import org.apache.stanbol.enhancer.servicesapi.rdf.ExecutionMetadata;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.CONFIDENCE_LEVEL_ENUM;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -136,8 +139,8 @@ public class ContentItemResource extends
* {@link Properties#ENHANCER_SELECTED_TEXT}.
* This map is initialised by {@link #initOccurrences()}.
*/
- protected Map<UriRef,Map<String,EntityExtractionSummary>> extractionsByTypeMap =
- new HashMap<UriRef,Map<String,EntityExtractionSummary>>();
+ protected Map<UriRef,Map<EntityExtractionSummary,EntityExtractionSummary>> extractionsByTypeMap =
+ new HashMap<UriRef,Map<EntityExtractionSummary,EntityExtractionSummary>>();
private MGraph executionMetadata;
@@ -247,7 +250,7 @@ public class ContentItemResource extends
* Checks if there are Occurrences
*/
public boolean hasOccurrences(){
- for(Map<String,EntityExtractionSummary> occ : extractionsByTypeMap.values()){
+ for(Map<EntityExtractionSummary,EntityExtractionSummary> occ : extractionsByTypeMap.values()){
if(!occ.isEmpty()){
return true;
}
@@ -278,7 +281,7 @@ public class ContentItemResource extends
}
}
public Collection<EntityExtractionSummary> getOccurrences(UriRef type){
- Map<String,EntityExtractionSummary> typeMap = extractionsByTypeMap.get(type);
+ Map<EntityExtractionSummary,EntityExtractionSummary> typeMap = extractionsByTypeMap.get(type);
Collection<EntityExtractionSummary> typeOccurrences;
if(typeMap != null){
typeOccurrences = typeMap.values();
@@ -343,31 +346,33 @@ public class ContentItemResource extends
Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
while(textAnnotations.hasNext()){
NonLiteral textAnnotation = textAnnotations.next().getSubject();
- if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
- // this is not the most specific occurrence of this name: skip
- continue;
- }
+ //if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
+ // // this is not the most specific occurrence of this name: skip
+ // continue;
+ //}
String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
if(text == null){
//ignore text annotations without text
continue;
}
+ Integer start = EnhancementEngineHelper.get(graph,textAnnotation,
+ ENHANCER_START,Integer.class,lf);
+ Integer end = EnhancementEngineHelper.get(graph,textAnnotation,
+ ENHANCER_END,Integer.class,lf);
+ Double confidence = EnhancementEngineHelper.get(graph, textAnnotation,
+ ENHANCER_CONFIDENCE, Double.class, lf);
Iterator<UriRef> types = getReferences(graph, textAnnotation, DC_TYPE);
if(!types.hasNext()){ //create an iterator over null in case no types are present
types = Collections.singleton((UriRef)null).iterator();
}
while(types.hasNext()){
UriRef type = types.next();
- Map<String,EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
+ Map<EntityExtractionSummary,EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
if(occurrenceMap == null){
- occurrenceMap = new TreeMap<String,EntityExtractionSummary>(String.CASE_INSENSITIVE_ORDER);
+ occurrenceMap = new TreeMap<EntityExtractionSummary,EntityExtractionSummary>();
extractionsByTypeMap.put(type, occurrenceMap);
}
- EntityExtractionSummary entity = occurrenceMap.get(text);
- if(entity == null){
- entity = new EntityExtractionSummary(text, type, defaultThumbnails);
- occurrenceMap.put(text, entity);
- }
+ EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start,end,confidence,defaultThumbnails);
Collection<NonLiteral> suggestions = suggestionMap.get(textAnnotation);
if(suggestions != null){
for(NonLiteral entityAnnotation : suggestions){
@@ -379,10 +384,106 @@ public class ContentItemResource extends
graph);
}
}
+ EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
+ if(existingSummary == null){//new extraction summary
+ occurrenceMap.put(entity, entity);
+ } else {
+ //extraction summary with this text and suggestions already
+ //present ... only add a mention to the existing
+ existingSummary.addMention(new Mention(text, start, end, confidence));
+ }
}
}
}
-
+ /**
+ * Mentions of {@link EntityExtractionSummary EntityExtractionSummaries}.
+ * @author Rupert Westenthaler
+ *
+ */
+ public static class Mention implements Comparable<Mention>{
+ private String name;
+ private Integer start;
+ private Integer end;
+ private Double conf;
+
+ Mention(String name,Integer start, Integer end, Double confidence){
+ if(name == null){
+ throw new IllegalStateException("The name for a Mention MUST NOT be NULL!");
+ }
+ this.name = name;
+ this.start = start;
+ this.end = end;
+ this.conf = confidence;
+ }
+
+ public String getName() {
+ return name;
+ }
+ public Integer getStart() {
+ return start;
+ }
+ public Integer getEnd() {
+ return end;
+ }
+ public Double getConfidence() {
+ return conf;
+ }
+ public boolean hasOccurrence() {
+ return start != null && end != null;
+ }
+ public boolean hasConfidence(){
+ return conf != null;
+ }
+ @Override
+ public int hashCode() {
+ return name.hashCode() +
+ (start != null ? start.hashCode() : 0) +
+ (end != null ? end.hashCode() : 0);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if(obj instanceof Mention){
+ Mention o = (Mention)obj;
+ if(o.name.equals(name)){
+ if((o.start != null && o.start.equals(start)) ||
+ (o.start == null && start == null)){
+ if(o.end != null && o.end.equals(end)){
+ return true;
+ } else {
+ return o.end == null && end == null;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public int compareTo(Mention o) {
+ int c = String.CASE_INSENSITIVE_ORDER.compare(o.name, this.name);
+ if(c == 0){
+ if(start != null && o.start != null){
+ c = start.compareTo(o.start);
+ } else if(o.start != null){
+ c = 1;
+ } else if(start != null){
+ c = -1;
+ }
+ if(c == 0){
+ if(o.end != null && end != null){
+ c = end.compareTo(o.end);
+ } else if(o.end != null){
+ c = -1;
+ } else if(end != null){
+ c = 1;
+ }
+ }
+ }
+ return c;
+ }
+ }
+
public ChainExecution getChainExecution(){
return chainExecution;
}
@@ -439,29 +540,49 @@ public class ContentItemResource extends
protected final String name;
+
protected final UriRef type;
protected List<EntitySuggestion> suggestions = new ArrayList<EntitySuggestion>();
+ protected Set<UriRef> suggestionSet = new HashSet<UriRef>();
- protected List<String> mentions = new ArrayList<String>();
+ protected List<Mention> mentions = new ArrayList<Mention>();
public final Map<UriRef,String> defaultThumbnails;
- public EntityExtractionSummary(String name, UriRef type, Map<UriRef,String> defaultThumbnails) {
+
+ private Integer start;
+
+ private Integer end;
+
+
+ private Double confidence;
+
+ public EntityExtractionSummary(String name, UriRef type, Integer start, Integer end, Double confidence, Map<UriRef,String> defaultThumbnails) {
this.name = name;
this.type = type;
- mentions.add(name);
+ mentions.add(new Mention(name, start, end, confidence));
this.defaultThumbnails = defaultThumbnails;
+ this.start = start;
+ this.end = end;
+ this.confidence = confidence;
}
public void addSuggestion(UriRef uri, String label, Double confidence, TripleCollection properties) {
EntitySuggestion suggestion = new EntitySuggestion(uri, type, label, confidence, properties,
defaultThumbnails);
+ suggestionSet.add(uri);
if (!suggestions.contains(suggestion)) {
suggestions.add(suggestion);
Collections.sort(suggestions);
}
}
+ public void addMention(Mention mention){
+ if(!mentions.contains(mention)){
+ mentions.add(mention);
+ Collections.sort(mentions);
+ }
+ }
public String getName() {
EntitySuggestion bestGuess = getBestGuess();
@@ -470,7 +591,9 @@ public class ContentItemResource extends
}
return name;
}
-
+ public String getSelected(){
+ return name;
+ }
public String getUri() {
EntitySuggestion bestGuess = getBestGuess();
if (bestGuess != null) {
@@ -478,6 +601,13 @@ public class ContentItemResource extends
}
return null;
}
+ public Double getConfidence(){
+ EntitySuggestion bestGuess = getBestGuess();
+ if (bestGuess != null) {
+ return bestGuess.getConfidence();
+ }
+ return confidence;
+ }
public String getSummary() {
if (suggestions.isEmpty()) {
@@ -485,7 +615,15 @@ public class ContentItemResource extends
}
return suggestions.get(0).getSummary();
}
-
+ public Integer getStart() {
+ return start;
+ }
+ public Integer getEnd() {
+ return end;
+ }
+ public boolean hasOccurrence(){
+ return start != null && end != null;
+ }
public String getThumbnailSrc() {
if (suggestions.isEmpty()) {
return getMissingThumbnailSrc();
@@ -507,18 +645,41 @@ public class ContentItemResource extends
}
return suggestions.get(0);
}
-
+
public List<EntitySuggestion> getSuggestions() {
return suggestions;
}
- public List<String> getMentions() {
+ public List<Mention> getMentions() {
return mentions;
}
@Override
public int compareTo(EntityExtractionSummary o) {
- return getName().compareTo(o.getName());
+ int c = String.CASE_INSENSITIVE_ORDER.compare(getName(),o.getName());
+ if(c == 0){
+ if(suggestionSet.equals(o.suggestionSet)){
+ return 0; //assume as equals if name and suggestionSet is the same
+ } else { //sort by mention
+ if(start != null && o.start != null){
+ c = start.compareTo(o.start);
+ } else if(o.start != null){
+ c = 1;
+ } else if(start != null){
+ c = -1;
+ }
+ if(c == 0){
+ if(o.end != null && end != null){
+ c = end.compareTo(o.end);
+ } else if(o.end != null){
+ c = -1;
+ } else if(end != null){
+ c = 1;
+ }
+ }
+ }
+ }
+ return c;
}
@Override
@@ -529,10 +690,14 @@ public class ContentItemResource extends
if (o == null || getClass() != o.getClass()) {
return false;
}
-
EntityExtractionSummary that = (EntityExtractionSummary) o;
-
- return !(name != null ? !name.equals(that.name) : that.name != null);
+ //if name and suggestions are the same ... consider as equals
+ if(getName().equalsIgnoreCase(getName())){
+ return suggestionSet.equals(that.suggestionSet);
+ } else {
+ return false;
+ }
+ //return !(name != null ? !name.equals(that.name) : that.name != null);
}
@Override
Modified: incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl (original)
+++ incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl Fri Aug 31 09:39:57 2012
@@ -31,6 +31,12 @@
<#else>
${entity.name}
</#if>
+ <br><span class="metadata">
+ <#if entity.name != entity.selected>for:'${entity.selected}',</#if>
+ <#if entity.mentions?size > 1>${entity.mentions?size} mentions
+ <#else>
+ <#if entity.hasOccurrence()>pos:[${entity.start},${entity.end}]</#if></#if>,
+ conf:${entity.confidence?string("0.##")}</span>
</th>
</tr>
</thead>
@@ -44,7 +50,9 @@
<tr>
<td class="thumb"><img src="${suggestion.thumbnailSrc}"
onerror="$(this).attr('src', '${suggestion.missingThumbnailSrc}');" alt="${suggestion.label}" /></td>
- <td><a href="${suggestion.uri}" title="${suggestion.summary}" class="external">${suggestion.label}</a></td>
+ <td><a href="${suggestion.uri}" title="${suggestion.summary}" class="external">
+ ${suggestion.label}</a><br>
+ <span class="metadata">conf:${suggestion.confidence?string("0.##")}</span></td>
</tr>
</#list>
<#if entity.mentions?size != 0>
@@ -55,7 +63,12 @@
<#list entity.mentions as mention>
<tr>
<td></td>
- <td>${mention}</td>
+ <td>${mention.name}<br><span class="metadata">
+ <#if mention.hasOccurrence()>
+ pos:[${mention.start},${mention.end}]
+ </#if>
+ <#if mention.hasConfidence()>
+ , conf: ${mention.confidence}</#if></span></td>
</tr>
</#list>
</tbody>