You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/08/31 11:39:58 UTC

svn commit: r1379385 [2/2] - in /incubator/stanbol/branches/disambiguation-engine: bundlelist/src/main/bundles/ defaults/src/main/resources/config/ engines/disambiguation-mlt/ engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/...

Modified: incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/disambiguation-mlt/src/main/java/org/apache/stanbol/enhancer/engine/disambiguation/mlt/Suggestion.java Fri Aug 31 09:39:57 2012
@@ -16,91 +16,216 @@
  */
 package org.apache.stanbol.enhancer.engine.disambiguation.mlt;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+
+import java.util.SortedMap;
+import java.util.SortedSet;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.entityhub.servicesapi.model.Entity;
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
+import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.site.Site;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A suggestion of an {@link Entity} for a fise:TextAnnotation processed
  * by the NamedEntityTaggingEngine
- * @author Rupert Westenthaler
  */
 public class Suggestion implements Comparable<Suggestion>{
-    private final Entity entity;
-    private double levenshtein = -1;
-    private Double score;
-    private Text matchedLabel;
-    private String URI;
-    protected Suggestion(Entity entity){
-        this.entity = entity;
+    
+    private static final Logger log = LoggerFactory.getLogger(Suggestion.class);
+    
+    private static final LiteralFactory lf = LiteralFactory.getInstance();
+    
+    private static final UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
+
+    private UriRef entityAnnotation;
+    private UriRef entityUri;
+    private Double originalConfidnece;
+    
+    private Entity entity;
+    private Double normalizedDisambiguationScore;
+    private Double disambiguatedConfidence;
+    private String site;
+
+    
+    private Suggestion(UriRef entityAnnotation){
+        this.entityAnnotation = entityAnnotation;
     }
     
+    public Suggestion(Entity entity){
+        this.entity = entity;
+        this.entityUri = new UriRef(entity.getId());
+        this.site = entity.getSite();
+    }
     
     /**
-     * @return the levenshtein
+     * Allows to create Suggestions from existing fise:TextAnnotation contained
+     * in the metadata of the processed {@link ContentItem}
+     * @param graph
+     * @param entityAnnotation
+     * @return
      */
-    public final double getLevenshtein() {
-        return levenshtein;
+    public static Suggestion createFromEntityAnnotation(TripleCollection graph, UriRef entityAnnotation){
+        Suggestion suggestion = new Suggestion(entityAnnotation);
+        suggestion.entityUri = EnhancementEngineHelper.getReference(
+            graph, entityAnnotation, ENHANCER_ENTITY_REFERENCE);
+        if(suggestion.entityUri == null){
+            //most likely not a fise:EntityAnnotation
+            log.debug("Unable to create Suggestion for EntityAnnotation {} "
+                    + "because property {} is not present", entityAnnotation, ENHANCER_ENTITY_REFERENCE);
+            return null;
+        }
+        suggestion.originalConfidnece = EnhancementEngineHelper.get(
+            graph, entityAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
+        if(suggestion.originalConfidnece == null){
+            log.warn("EntityAnnotation {} does not define a value for "
+                    + "property {}. Will use '0' as fallback", entityAnnotation, ENHANCER_CONFIDENCE);
+            suggestion.originalConfidnece = 0.0;
+        }
+        suggestion.site = EnhancementEngineHelper.getString(graph, entityAnnotation, ENTITYHUB_SITE);
+        //NOTE: site might be NULL
+        return suggestion;
     }
 
-
     /**
-     * @param levenshtein the levenshtein to set
+     * The URI of the fise:EntityAnnotation representing this suggestion in the
+     * {@link ContentItem#getMetadata() metadata} of the processed 
+     * {@link ContentItem}. This will be <code>null</code> if this Suggestion
+     * was created as part of the Disambiguation process and was not present
+     * in the metadata of the content item before the disambiguation.
+     * @return the URI of the fise:EntityAnnotation or <code>null</code> if
+     * not present.
      */
-    protected final void setLevenshtein(double levenshtein) {
-        this.levenshtein = levenshtein;
+    public UriRef getEntityAnnotation() {
+        return entityAnnotation;
     }
-
-
     /**
-     * @return the score
+     * Allows to set the URI of the fise:EntityAnnotation. This is required
+     * if the original enhancement structure shared one fise:EntityAnnotation
+     * instance for two fise:TextAnnotations (e.g. because both TextAnnotations
+     * had the exact same value for fise:selected-text). After
+     * disambiguation it is necessary to 'clone' fise:EntityAnnotations like
+     * that to give them different fise:confidence values. Because of that
+     * it is supported to set the new URI of the cloned fise:EntityAnnotation.
+     * @param uri the uri of the cloned fise:EntityAnnotation
      */
-    public final Double getScore() {
-        return score;
+    public void setEntityAnnotation(UriRef uri) {
+        this.entityAnnotation = uri;
     }
 
-
     /**
-     * @param score the score to set
+     * The URI of the Entity (MUST NOT be <code>null</code>)
+     * @return the URI
      */
-    protected final void setScore(Double score) {
-        this.score = score;
+    public UriRef getEntityUri() {
+        return entityUri;
     }
 
-
     /**
-     * @return the matchedLabel
+     * The original confidence of the fise:EntityAnnotation or <code>null</code>
+     * if not available.
+     * @return
      */
-    public final Text getMatchedLabel() {
-        return matchedLabel;
+    public Double getOriginalConfidnece() {
+        return originalConfidnece;
     }
 
-
     /**
-     * @param matchedLabel the matchedLabel to set
+     * The {@link Entity} or <code>null</code> if not available. For
+     * Suggestions that are created based on fise:EntityAnnotations the Entity
+     * is not available. Entities might be loaded as part of the
+     * Disambiguation process.
+     * @return the {@link Entity} or <code>null</code> if not available
+     */
+    public Entity getEntity() {
+        return entity;
+    }
+    
+    /**
+     * The score of the disambiguation. This is just the score of the
+     * disambiguation that is not yet combined with the
+     * {@link #getOriginalConfidnece()} to become the
+     * {@link #getDisambiguatedConfidence()}
+     * @return the disambiguation score
      */
-    protected final void setMatchedLabel(Text matchedLabel) {
-        this.matchedLabel = matchedLabel;
+    public Double getNormalizedDisambiguationScore() {
+        return normalizedDisambiguationScore;
     }
 
-    protected final String getURI() {
-        return this.URI;
+    /**
+     * The confidence after disambiguation. Will be <code>null</code> at the
+     * beginning
+     * @return the disambiguated confidence or <code>null</code> if not yet
+     * disambiguated
+     */
+    public Double getDisambiguatedConfidence() {
+        return disambiguatedConfidence;
     }
-    protected final void setURI(String URI) {
-        this.URI = URI;
+    /**
+     * The name of the Entityhub {@link Site} the suggested Entity is
+     * managed.
+     * @return the name of the Entityhub {@link Site}
+     */
+    public String getSite() {
+        return site;
     }
-
     /**
-     * @return the entity
+     * Setter for the normalized [0..1] score of the disambiguation
+     * @param normalizedDisambiguationScore
      */
-    public final Entity getEntity() {
-        return entity;
+    public void setNormalizedDisambiguationScore(Double normalizedDisambiguationScore) {
+        this.normalizedDisambiguationScore = normalizedDisambiguationScore;
+    }
+    /**
+     * Setter for the confidence after disambiguation
+     * @param disambiguatedConfidence
+     */
+    public void setDisambiguatedConfidence(Double disambiguatedConfidence) {
+        this.disambiguatedConfidence = disambiguatedConfidence;
     }
-
 
     @Override
-    public int compareTo(Suggestion other) {
-        return other.score.compareTo(score);
+    public int hashCode() {
+        return entityUri.hashCode();
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+        return obj instanceof Suggestion && ((Suggestion)obj).entityUri.equals(entityUri);
     }
     
+    /**
+     * Compares based on the {@link #getDisambiguatedConfidence()} (if present)
+     * and falls back to the {@link #getOriginalConfidnece()}. If the
+     * original confidence value is not present or both Suggestions do have
+     * the same confidence the natural order of the Entities URI is used. This
+     * also ensures <code>(x.compareTo(y)==0) == (x.equals(y))</code> and
+     * allows to use this class with {@link SortedMap} and {@link SortedSet}
+     * implementations.<p>
+     */
+    @Override
+    public int compareTo(Suggestion other) {
+        int result;
+        if(disambiguatedConfidence != null && other.disambiguatedConfidence != null){
+            result = other.disambiguatedConfidence.compareTo(disambiguatedConfidence);
+        } else if(other.originalConfidnece != null && originalConfidnece != null){
+            result = other.originalConfidnece.compareTo(originalConfidnece);
+        } else {
+            result = 0;
+        }
+        //ensure (x.compareTo(y)==0) == (x.equals(y))
+        return result == 0 ? entityUri.getUnicodeString().compareTo(
+            other.entityUri.getUnicodeString()) : result; 
+    }
     
 }

Modified: incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java Fri Aug 31 09:39:57 2012
@@ -533,7 +533,7 @@ public class NamedEntityTaggingEngine ex
                     // and labels in the same language as the content
                     (language != null && label.getLanguage().startsWith(language))) {
                     double actMatch = levenshtein(
-                        casesensitive ? label.getText().toLowerCase() : label.getText(), namedEntityLabel);
+                        casesensitive ? label.getText() : label.getText().toLowerCase(), namedEntityLabel);
                     if (actMatch > match.getLevenshtein()) {
                         match.setLevenshtein(actMatch);
                         match.setMatchedLabel(label);

Modified: incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Fri Aug 31 09:39:57 2012
@@ -741,7 +741,7 @@ public class KeywordLinkingEngine 
             if(minSearchTokenLength < 1){
                 throw new ConfigurationException(MIN_SEARCH_TOKEN_LENGTH, "Values MUST be valid Integer values > 0");
             }
-            linkerConfig.setMaxSuggestions(minSearchTokenLength);
+            linkerConfig.setMinSearchTokenLength(minSearchTokenLength);
         }
         //init the REDIRECT_PROCESSING_MODE
         value = configuration.get(REDIRECT_PROCESSING_MODE);

Modified: incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/EntityLinker.java Fri Aug 31 09:39:57 2012
@@ -410,7 +410,9 @@ public class EntityLinker {
         //ensure the correct order of the tokens in the suggested entity
         boolean search = true;
         int firstFoundIndex = -1;
+        int firstProcessableFoundIndex = -1;
         int lastFoundIndex = -1;
+        int lastProcessableFoundIndex = -1;
         int firstFoundLabelIndex = -1;
         int lastfoundLabelIndex = -1;
         Token currentToken;
@@ -462,6 +464,10 @@ public class EntityLinker {
                 if(found){ //found
                     if(isProcessable){
                         foundProcessableTokens++; //only count processable Tokens
+                        if(firstProcessableFoundIndex < 0){
+                            firstProcessableFoundIndex = currentIndex;
+                        }
+                        lastProcessableFoundIndex = currentIndex;
                     }
                     foundTokens++;
                     foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches
@@ -512,6 +518,7 @@ public class EntityLinker {
                 if(found){ //found
                     if(isProcessable){
                         foundProcessableTokens++; //only count processable Tokens
+                        firstProcessableFoundIndex = currentIndex;
                     }
                     foundTokens++;
                     foundTokenMatch = foundTokenMatch + matchFactor; //sum up the matches
@@ -533,6 +540,7 @@ public class EntityLinker {
         //e.g. if given and family name of persons are switched
         MATCH labelMatch; 
         int coveredTokens = lastFoundIndex-firstFoundIndex+1;
+        int coveredProcessableTokens = lastProcessableFoundIndex-firstProcessableFoundIndex+1;
         float labelMatchScore = (foundTokenMatch/(float)labelTokens.length);
         //Matching rules
         // - if less than config#minTokenFound() than accept only EXACT
@@ -552,8 +560,8 @@ public class EntityLinker {
                     //  Tokens are found, but if all Tokens of the Label are
                     //  matched! (STANBOL-622)
                     //foundTokens == coveredTokens) && 
-                    foundTokens >= labelTokens.length) &&
-                    labelMatchScore >= 0.6f){
+                    foundTokens >= labelTokens.length)){ //&&
+                    //labelMatchScore >= 0.6f){
                 //same as above
                 //if(foundTokens == coveredTokens){
                 if(foundTokens == labelTokens.length && foundTokens == coveredTokens){
@@ -568,7 +576,9 @@ public class EntityLinker {
                 if(match.getMatchCount() < foundProcessableTokens ||
                         match.getMatchCount() == foundProcessableTokens && 
                         labelMatch.ordinal() > match.getMatch().ordinal()){
-                    match.updateMatch(labelMatch, firstFoundIndex, coveredTokens, foundTokens,
+//                    match.updateMatch(labelMatch, firstFoundIndex, coveredTokens, foundTokens,
+//                        foundTokenMatch/foundTokens,label,labelTokens.length);
+                    match.updateMatch(labelMatch, firstProcessableFoundIndex, coveredProcessableTokens, foundProcessableTokens,
                         foundTokenMatch/foundTokens,label,labelTokens.length);
                 } //else this match is not better as the existing one
             } //else ignore labels with MATCH.NONE

Modified: incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Fri Aug 31 09:39:57 2012
@@ -16,11 +16,13 @@
 */
 package org.apache.stanbol.enhancer.servicesapi.helper;
 
+import static java.util.Collections.singleton;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
@@ -35,6 +37,7 @@ import org.apache.clerezza.rdf.core.Lite
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.TypedLiteral;
@@ -205,6 +208,23 @@ public class EnhancementEngineHelper {
         return enhancement;
     }
     /**
+     * Adds the parsed {@link EnhancementEngine} as dc:contributer to the
+     * enhancement and also sets the dc:modified property accordingly
+     * @param metadata the {@link ContentItem#getMetadata()}
+     * @param enhancement the enhancement
+     * @param engine the engine
+     */
+    public static void addContributingEngine(MGraph metadata, UriRef enhancement,
+                                             EnhancementEngine engine){
+        LiteralFactory literalFactory = LiteralFactory.getInstance();
+        // TODO: use a public dereferencing URI instead?
+        metadata.add(new TripleImpl(enhancement, Properties.DC_CONTRIBUTOR,
+            literalFactory.createTypedLiteral(engine.getClass().getName())));
+        //set the modification date to the current date.
+        set(metadata,enhancement,Properties.DC_MODIFIED,new Date(),literalFactory);
+    }
+    
+    /**
      * Create a new extraction instance in the metadata-graph of the content
      * item along with default properties (dc:creator and dc:created) and return
      * the UriRef of the extraction so that engines can further add
@@ -288,6 +308,76 @@ public class EnhancementEngineHelper {
         }
     }
     /**
+     * Replaces all current values of the property for the resource
+     * with the parsed value
+     * @param graph the graph
+     * @param resource the resource
+     * @param property the property
+     * @param value the value
+     */
+    public static void set(MGraph graph, NonLiteral resource, UriRef property, Resource value){
+        set(graph,resource,property,value == null ? null : singleton(value),null);
+    }
+    /**
+     * Replaces all current values of the property for the resource
+     * with the parsed values
+     * @param graph the graph
+     * @param resource the resource
+     * @param property the property
+     * @param value the value
+     */
+    public static void set(MGraph graph, NonLiteral resource, UriRef property, Collection<Resource> values){
+        set(graph,resource,property,values,null);
+    }
+
+    /**
+     * Replaces all current values of the property for the resource
+     * with the parsed value
+     * @param graph the graph
+     * @param resource the resource
+     * @param property the property
+     * @param value the value. In case it is an instance of {@link Resource} it
+     * is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
+     * is used to create a {@link TypedLiteral} for the parsed value.
+     * @param literalFactory the {@link LiteralFactory} used in case the parsed
+     * value is not an {@link Resource}
+     */
+    public static void set(MGraph graph, NonLiteral resource, UriRef property,
+                           Object value, LiteralFactory literalFactory){
+        set(graph,resource,property,value == null ? null : singleton(value),literalFactory);
+    }
+    /**
+     * Replaces all current values of the property for the resource
+     * with the parsed values
+     * @param graph the graph
+     * @param resource the resource
+     * @param property the property
+     * @param value the value. In case it is an instance of {@link Resource} it
+     * is directly added to the graph. Otherwise the parsed {@link LiteralFactory}
+     * is used to create a {@link TypedLiteral} for the parsed value.
+     * @param literalFactory the {@link LiteralFactory} used in case the parsed
+     * value is not an {@link Resource}
+     */
+    public static void set(MGraph graph, NonLiteral resource, UriRef property,
+                               Collection<?> values, LiteralFactory literalFactory){
+        Iterator<Triple> currentValues = graph.filter(resource, property, null);
+        while(currentValues.hasNext()){
+            currentValues.next();
+            currentValues.remove();
+        }
+        if(values != null){
+            for(Object value : values){
+                if(value instanceof Resource){
+                    graph.add(new TripleImpl(resource, property, (Resource) value));
+                } else if (value != null){
+                    graph.add(new TripleImpl(resource, property, 
+                        literalFactory.createTypedLiteral(value)));
+                }
+            }
+        }
+    }
+    
+    /**
      * Getter for the typed literal values of the property for a resource
      * @param <T> the java class the literal value needs to be converted to.
      * Note that the parsed LiteralFactory needs to support this conversion

Modified: incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java Fri Aug 31 09:39:57 2012
@@ -61,11 +61,27 @@ public class Properties {
             + "created");
 
     /**
+     * Modification date of a resource. Used by Stanbol Enhancer to annotate the 
+     * modification date of the enhancement if it was changed by an other
+     * enhancement engine as the one creating it. Multiple changes of the
+     * creating enhancement engines are not considered as modifications.
+     */
+    public static final UriRef DC_MODIFIED = new UriRef(NamespaceEnum.dc
+            + "modified");
+
+    /**
      * The entity responsible for the creation of a resource. Used by Stanbol Enhancer to
      * annotate the enhancement engine that created an enhancement
      */
     public static final UriRef DC_CREATOR = new UriRef(NamespaceEnum.dc
             + "creator");
+    /**
+     * The entity contributed to a resource. Used by Stanbol Enhancer to
+     * annotate the enhancement engine that changed an enhancement originally
+     * created by an other enhancemetn engine
+     */
+    public static final UriRef DC_CONTRIBUTOR = new UriRef(NamespaceEnum.dc
+            + "contributor");
 
     /**
      * The nature or genre of the resource. Stanbol Enhancer uses this property to refer to

Modified: incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java (original)
+++ incubator/stanbol/branches/disambiguation-engine/jersey/src/main/java/org/apache/stanbol/enhancer/jersey/resource/ContentItemResource.java Fri Aug 31 09:39:57 2012
@@ -28,8 +28,10 @@ import static org.apache.stanbol.enhance
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LAT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.GEO_LONG;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
@@ -93,6 +95,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.execution.Execution;
 import org.apache.stanbol.enhancer.servicesapi.rdf.ExecutionMetadata;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.CONFIDENCE_LEVEL_ENUM;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -136,8 +139,8 @@ public class ContentItemResource extends
      * {@link Properties#ENHANCER_SELECTED_TEXT}.
      * This map is initialised by {@link #initOccurrences()}.
      */
-    protected Map<UriRef,Map<String,EntityExtractionSummary>> extractionsByTypeMap = 
-        new HashMap<UriRef,Map<String,EntityExtractionSummary>>();
+    protected Map<UriRef,Map<EntityExtractionSummary,EntityExtractionSummary>> extractionsByTypeMap = 
+        new HashMap<UriRef,Map<EntityExtractionSummary,EntityExtractionSummary>>();
 
     private MGraph executionMetadata;
 
@@ -247,7 +250,7 @@ public class ContentItemResource extends
      * Checks if there are Occurrences
      */
     public boolean hasOccurrences(){
-        for(Map<String,EntityExtractionSummary> occ : extractionsByTypeMap.values()){
+        for(Map<EntityExtractionSummary,EntityExtractionSummary> occ : extractionsByTypeMap.values()){
             if(!occ.isEmpty()){
                 return true;
             }
@@ -278,7 +281,7 @@ public class ContentItemResource extends
         }
     }
     public Collection<EntityExtractionSummary> getOccurrences(UriRef type){
-        Map<String,EntityExtractionSummary> typeMap = extractionsByTypeMap.get(type);
+        Map<EntityExtractionSummary,EntityExtractionSummary> typeMap = extractionsByTypeMap.get(type);
         Collection<EntityExtractionSummary> typeOccurrences;
         if(typeMap != null){
             typeOccurrences = typeMap.values();
@@ -343,31 +346,33 @@ public class ContentItemResource extends
         Iterator<Triple> textAnnotations = graph.filter(null, RDF.type, ENHANCER_TEXTANNOTATION);
         while(textAnnotations.hasNext()){
             NonLiteral textAnnotation = textAnnotations.next().getSubject();
-            if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
-                // this is not the most specific occurrence of this name: skip
-                continue;
-            }
+            //if (graph.filter(textAnnotation, DC_RELATION, null).hasNext()) {
+            //    // this is not the most specific occurrence of this name: skip
+            //    continue;
+            //}
             String text = getString(graph, textAnnotation, Properties.ENHANCER_SELECTED_TEXT);
             if(text == null){
                 //ignore text annotations without text
                 continue;
             }
+            Integer start = EnhancementEngineHelper.get(graph,textAnnotation, 
+                ENHANCER_START,Integer.class,lf);
+            Integer end = EnhancementEngineHelper.get(graph,textAnnotation, 
+                ENHANCER_END,Integer.class,lf);
+            Double confidence = EnhancementEngineHelper.get(graph, textAnnotation, 
+                ENHANCER_CONFIDENCE, Double.class, lf);
             Iterator<UriRef> types = getReferences(graph, textAnnotation, DC_TYPE);
             if(!types.hasNext()){ //create an iterator over null in case no types are present
                 types = Collections.singleton((UriRef)null).iterator();
             }
             while(types.hasNext()){
                 UriRef type = types.next();
-                Map<String,EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
+                Map<EntityExtractionSummary,EntityExtractionSummary> occurrenceMap = extractionsByTypeMap.get(type);
                 if(occurrenceMap == null){
-                    occurrenceMap = new TreeMap<String,EntityExtractionSummary>(String.CASE_INSENSITIVE_ORDER);
+                    occurrenceMap = new TreeMap<EntityExtractionSummary,EntityExtractionSummary>();
                     extractionsByTypeMap.put(type, occurrenceMap);
                 }
-                EntityExtractionSummary entity = occurrenceMap.get(text);
-                if(entity == null){
-                    entity = new EntityExtractionSummary(text, type, defaultThumbnails);
-                    occurrenceMap.put(text, entity);
-                }
+                EntityExtractionSummary entity = new EntityExtractionSummary(text, type, start,end,confidence,defaultThumbnails);
                 Collection<NonLiteral> suggestions = suggestionMap.get(textAnnotation);
                 if(suggestions != null){
                     for(NonLiteral entityAnnotation : suggestions){
@@ -379,10 +384,106 @@ public class ContentItemResource extends
                             graph);
                     }
                 }
+                EntityExtractionSummary existingSummary = occurrenceMap.get(entity);
+                if(existingSummary == null){//new extraction summary
+                    occurrenceMap.put(entity, entity);
+                } else {
+                    //extraction summary with this text and suggestions already
+                    //present ... only add a mention to the existing
+                    existingSummary.addMention(new Mention(text, start, end, confidence));
+                }
             }
         }
     }
-
+    /**
+     * Mentions of {@link EntityExtractionSummary EntityExtractionSummaries}. 
+     * @author Rupert Westenthaler
+     *
+     */
+    public static class Mention implements Comparable<Mention>{
+        private String name;
+        private Integer start;
+        private Integer end;
+        private Double conf;
+
+        Mention(String name,Integer start, Integer end, Double confidence){
+            if(name == null){
+                throw new IllegalStateException("The name for a Mention MUST NOT be NULL!");
+            }
+            this.name = name;
+            this.start = start;
+            this.end = end;
+            this.conf = confidence;
+        }
+        
+        public String getName() {
+            return name;
+        }
+        public Integer getStart() {
+            return start;
+        }
+        public Integer getEnd() {
+            return end;
+        }
+        public Double getConfidence() {
+            return conf;
+        }
+        public boolean hasOccurrence() {
+            return start != null && end != null;
+        }
+        public boolean hasConfidence(){
+            return conf != null;
+        }
+        @Override
+        public int hashCode() {
+            return name.hashCode() + 
+                    (start != null ? start.hashCode() : 0) +
+                    (end != null ? end.hashCode() : 0);
+        }
+        
+        @Override
+        public boolean equals(Object obj) {
+            if(obj instanceof Mention){
+                Mention o = (Mention)obj;
+                if(o.name.equals(name)){
+                    if((o.start != null && o.start.equals(start)) ||
+                            (o.start == null && start == null)){
+                        if(o.end != null && o.end.equals(end)){
+                            return true;
+                        } else {
+                            return o.end == null && end == null;
+                        }
+                    }
+                }
+            }
+            return false;
+        }
+        
+        @Override
+        public int compareTo(Mention o) {
+            int c = String.CASE_INSENSITIVE_ORDER.compare(o.name, this.name);
+            if(c == 0){
+                if(start != null && o.start != null){
+                    c = start.compareTo(o.start);
+                } else if(o.start != null){
+                    c = 1;
+                } else if(start != null){
+                    c = -1;
+                }
+                if(c == 0){
+                    if(o.end != null && end != null){
+                        c = end.compareTo(o.end);
+                    } else if(o.end != null){
+                        c = -1;
+                    } else if(end != null){
+                        c = 1;
+                    }
+                }
+            }
+            return c;
+        }
+    }
+    
     public ChainExecution getChainExecution(){
         return chainExecution;
     }
@@ -439,29 +540,49 @@ public class ContentItemResource extends
 
         protected final String name;
 
+        
         protected final UriRef type;
 
         protected List<EntitySuggestion> suggestions = new ArrayList<EntitySuggestion>();
+        protected Set<UriRef> suggestionSet = new HashSet<UriRef>();
 
-        protected List<String> mentions = new ArrayList<String>();
+        protected List<Mention> mentions = new ArrayList<Mention>();
 
         public final Map<UriRef,String> defaultThumbnails;
 
-        public EntityExtractionSummary(String name, UriRef type, Map<UriRef,String> defaultThumbnails) {
+
+        private Integer start;
+
+        private Integer end;
+
+
+        private Double confidence;
+
+        public EntityExtractionSummary(String name, UriRef type, Integer start, Integer end, Double confidence, Map<UriRef,String> defaultThumbnails) {
             this.name = name;
             this.type = type;
-            mentions.add(name);
+            mentions.add(new Mention(name, start, end, confidence));
             this.defaultThumbnails = defaultThumbnails;
+            this.start = start;
+            this.end = end;
+            this.confidence = confidence;
         }
 
         public void addSuggestion(UriRef uri, String label, Double confidence, TripleCollection properties) {
             EntitySuggestion suggestion = new EntitySuggestion(uri, type, label, confidence, properties,
                     defaultThumbnails);
+            suggestionSet.add(uri);
             if (!suggestions.contains(suggestion)) {
                 suggestions.add(suggestion);
                 Collections.sort(suggestions);
             }
         }
+        public void addMention(Mention mention){
+            if(!mentions.contains(mention)){
+                mentions.add(mention);
+                Collections.sort(mentions);
+            }
+        }
 
         public String getName() {
             EntitySuggestion bestGuess = getBestGuess();
@@ -470,7 +591,9 @@ public class ContentItemResource extends
             }
             return name;
         }
-
+        public String getSelected(){
+            return name;
+        }
         public String getUri() {
             EntitySuggestion bestGuess = getBestGuess();
             if (bestGuess != null) {
@@ -478,6 +601,13 @@ public class ContentItemResource extends
             }
             return null;
         }
+        public Double getConfidence(){
+            EntitySuggestion bestGuess = getBestGuess();
+            if (bestGuess != null) {
+                return bestGuess.getConfidence();
+            }
+            return confidence;
+        }
 
         public String getSummary() {
             if (suggestions.isEmpty()) {
@@ -485,7 +615,15 @@ public class ContentItemResource extends
             }
             return suggestions.get(0).getSummary();
         }
-
+        public Integer getStart() {
+            return start;
+        }
+        public Integer getEnd() {
+            return end;
+        }
+        public boolean hasOccurrence(){
+            return start != null && end != null;
+        }
         public String getThumbnailSrc() {
             if (suggestions.isEmpty()) {
                 return getMissingThumbnailSrc();
@@ -507,18 +645,41 @@ public class ContentItemResource extends
             }
             return suggestions.get(0);
         }
-
+        
         public List<EntitySuggestion> getSuggestions() {
             return suggestions;
         }
 
-        public List<String> getMentions() {
+        public List<Mention> getMentions() {
             return mentions;
         }
 
         @Override
         public int compareTo(EntityExtractionSummary o) {
-            return getName().compareTo(o.getName());
+            int c = String.CASE_INSENSITIVE_ORDER.compare(getName(),o.getName());
+            if(c == 0){
+                if(suggestionSet.equals(o.suggestionSet)){
+                    return 0; //assume as equals if name and suggestionSet is the same
+                } else { //sort by mention
+                    if(start != null && o.start != null){
+                        c = start.compareTo(o.start);
+                    } else if(o.start != null){
+                        c = 1;
+                    } else if(start != null){
+                        c = -1;
+                    }
+                    if(c == 0){
+                        if(o.end != null && end != null){
+                            c = end.compareTo(o.end);
+                        } else if(o.end != null){
+                            c = -1;
+                        } else if(end != null){
+                            c = 1;
+                        }
+                    }
+                }
+            }
+            return c;
         }
 
         @Override
@@ -529,10 +690,14 @@ public class ContentItemResource extends
             if (o == null || getClass() != o.getClass()) {
                 return false;
             }
-
             EntityExtractionSummary that = (EntityExtractionSummary) o;
-
-            return !(name != null ? !name.equals(that.name) : that.name != null);
+            //if name and suggestions are the same ... consider as equals
+            if(getName().equalsIgnoreCase(getName())){
+                return suggestionSet.equals(that.suggestionSet);
+            } else {
+                return false;
+            }
+            //return !(name != null ? !name.equals(that.name) : that.name != null);
         }
 
         @Override

Modified: incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl?rev=1379385&r1=1379384&r2=1379385&view=diff
==============================================================================
--- incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl (original)
+++ incubator/stanbol/branches/disambiguation-engine/jersey/src/main/resources/org/apache/stanbol/enhancer/jersey/templates/imports/entities.ftl Fri Aug 31 09:39:57 2012
@@ -31,6 +31,12 @@
     <#else>
     ${entity.name}
     </#if>
+    <br><span class="metadata">
+    <#if entity.name != entity.selected>for:'${entity.selected}',</#if>
+    <#if entity.mentions?size &gt; 1>${entity.mentions?size} mentions
+    <#else>
+      <#if entity.hasOccurrence()>pos:[${entity.start},${entity.end}]</#if></#if>,
+     conf:${entity.confidence?string("0.##")}</span>
   </th>
 </tr>
 </thead>
@@ -44,7 +50,9 @@
 <tr>
   <td class="thumb"><img src="${suggestion.thumbnailSrc}"
     onerror="$(this).attr('src', '${suggestion.missingThumbnailSrc}');" alt="${suggestion.label}" /></td>
-  <td><a href="${suggestion.uri}" title="${suggestion.summary}" class="external">${suggestion.label}</a></td>
+  <td><a href="${suggestion.uri}" title="${suggestion.summary}" class="external">
+  	${suggestion.label}</a><br>
+  	<span class="metadata">conf:${suggestion.confidence?string("0.##")}</span></td>
 </tr>
 </#list>
 <#if entity.mentions?size != 0>
@@ -55,7 +63,12 @@
 <#list entity.mentions as mention>
 <tr>
   <td></td>
-  <td>${mention}</td>
+  <td>${mention.name}<br><span class="metadata">
+  <#if mention.hasOccurrence()>
+    pos:[${mention.start},${mention.end}]
+   </#if>
+   <#if mention.hasConfidence()>
+    , conf: ${mention.confidence}</#if></span></td>
 </tr>
 </#list>
 </tbody>