You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/11/24 12:01:07 UTC
svn commit: r1544957 - in
/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main:
java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
resources/OSGI-INF/metatype/metatype.properties
Author: rwesten
Date: Sun Nov 24 11:01:07 2013
New Revision: 1544957
URL: http://svn.apache.org/r1544957
Log:
implementation of STANBOL-1219 for the 0.12 releasing branch
Modified:
stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified: stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1544957&r1=1544956&r2=1544957&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java Sun Nov 24 11:01:07 2013
@@ -140,13 +140,26 @@ import org.slf4j.LoggerFactory;
"de;uc=MATCH", //in German all Nouns are upper case
"es;lc=Noun", //the OpenNLP POS tagger for Spanish does not support ProperNouns
"nl;lc=Noun"}), //same for Dutch
- //@Property(name=DEFAULT_MATCHING_LANGUAGE,value=""), //will only be used when adding alt label support
+ @Property(name=EntityCoMentionEngine.ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+ doubleValue=EntityCoMentionEngine.DEFAULT_CONFIDENCE_ADJUSTEMENT),
@Property(name=SERVICE_RANKING,intValue=0)
})
@Service(value=EnhancementEngine.class)
public class EntityCoMentionEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException> implements ServiceProperties {
+ /**
+ * Property used to configure if/how confidence values of existing suggestions
+ * are modified if a co-mention is detected for a fise:TextAnnotation.<p>
+ * Values MUST be in the range [0..1) the
+ * {@link #DEFAULT_CONFIDENCE_ADJUSTEMENT default} is <code>0.33</code> <p>
+ * Added with <a href="https://issues.apache.org/jira/browse/STANBOL-1219">STANBOL-1219</a>
+ */
+ public static final String ADJUST_EXISTING_SUGGESTION_CONFIDENCE = "enhancer.engines.comention.adjustExistingConfidence";
/**
+ * Default value for {@link #ADJUST_EXISTING_SUGGESTION_CONFIDENCE}
+ */
+ public static final double DEFAULT_CONFIDENCE_ADJUSTEMENT = 0.33;
+ /**
* first of the post processing engines (note STANBOL-1218)
*/
private static final Integer ENGINE_ORDERING = ServiceProperties.ORDERING_POST_PROCESSING + 80;
@@ -166,6 +179,8 @@ public class EntityCoMentionEngine exten
@Reference
protected LabelTokenizer labelTokenizer;
+ private double confidenceAdjustmentFactor;
+
// private BundleContext bundleContext;
/**
* EntityLinking configuration used for Co-Mention extractions
@@ -208,6 +223,29 @@ public class EntityCoMentionEngine exten
for(UriRef mappedUri : mappedUris){
linkerConfig.setTypeMapping(mappedUri.getUnicodeString(), null);
}
+ //parse confidence adjustment value (STANBOL-1219)
+ Object value = properties.get(ADJUST_EXISTING_SUGGESTION_CONFIDENCE);
+ final double confidenceAdjustment;
+ if(value == null){
+ confidenceAdjustment = DEFAULT_CONFIDENCE_ADJUSTEMENT;
+ } else if(value instanceof Number){
+ confidenceAdjustment = ((Number)value).doubleValue();
+ } else {
+ try {
+ confidenceAdjustment = Double.parseDouble(value.toString());
+ } catch (NumberFormatException e){
+ throw new ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+ "The confidence adjustement value for existing suggestions "
+ + "MUST BE a double value in the range [0..1)", e);
+ }
+ }
+ if(confidenceAdjustment < 0 || confidenceAdjustment >= 1){
+ throw new ConfigurationException(ADJUST_EXISTING_SUGGESTION_CONFIDENCE,
+ "The confidence adjustement value for existing suggestions "
+ + "MUST BE a double value in the range [0..1) (parsed: "
+ + confidenceAdjustment +")!");
+ }
+ confidenceAdjustmentFactor = 1 - confidenceAdjustment;
//get the metadata later set to the enhancement engine
}
/**
@@ -330,6 +368,7 @@ public class EntityCoMentionEngine exten
if(!ignore){
//collect confidence values of co-mentions
Double maxConfidence = null;
+ Double maxExistingConfidence = null;
if(textAnnotation == null){ //not found ... create a new TextAnnotation for the co-mention
textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textAnnotation,
@@ -369,6 +408,26 @@ public class EntityCoMentionEngine exten
maxConfidence = confidnece;
}
}
+ Map<NonLiteral, Double> existingSuggestions = new HashMap<NonLiteral,Double>();
+ if(maxConfidence != null && confidenceAdjustmentFactor < 1){
+ //adapt confidence of existing annotations
+ for(Iterator<Triple> esIt = metadata.filter(null, DC_RELATION, textAnnotation);esIt.hasNext();){
+ NonLiteral existingSuggestion = esIt.next().getSubject();
+ existingSuggestions.put(existingSuggestion,
+ EnhancementEngineHelper.get(metadata, existingSuggestion,
+ ENHANCER_CONFIDENCE, Double.class, literalFactory));
+ }
+ for(Entry<NonLiteral,Double> entry : existingSuggestions.entrySet()){
+ if(entry.getValue() != null){
+ double adjustedConfidence = entry.getValue() * confidenceAdjustmentFactor;
+ if(maxExistingConfidence == null || adjustedConfidence > maxExistingConfidence){
+ maxExistingConfidence = adjustedConfidence;
+ }
+ EnhancementEngineHelper.set(metadata, entry.getKey(),
+ ENHANCER_CONFIDENCE, adjustedConfidence, literalFactory);
+ }
+ }
+ }
//add the suggestions of the initial mention to this one
Set<Resource> values = new HashSet<Resource>();
for(Iterator<Triple> suggestions = metadata.filter(null, DC_RELATION, initialMention); suggestions.hasNext();){
@@ -382,13 +441,29 @@ public class EntityCoMentionEngine exten
metadata.add(new TripleImpl(textAnnotation, DC_RELATION, initialMention));
//metadata.add(new TripleImpl(initialMention, DC_RELATION, textAnnotation));
}
- //finally add the collected dc:types of initial mentions to the textAnnotation
+ // Adapt the dc:type values of the fise:TextAnnotation
+ // - if Suggestions added by this engine do have the max confidence
+ // use the dc:type values of the initial mention
+ // - if the original suggestions do have a higher confidence keep the
+ // existing
+ // - in case both do have the same confidence we add all dc:types
+ boolean removeExistingDcTypes = maxConfidence != null && (maxExistingConfidence == null ||
+ maxConfidence.compareTo(maxExistingConfidence) >= 0);
+ boolean addCoMentionDcTypes = maxExistingConfidence == null ||
+ (maxConfidence != null && maxConfidence.compareTo(maxExistingConfidence) >= 1);
Iterator<UriRef> existingDcTypesIt = getReferences(metadata, textAnnotation, DC_TYPE);
while(existingDcTypesIt.hasNext()){ //do not add existing
- dcTypes.remove(existingDcTypesIt.next());
+ //remove dc:type triples if they are not re-added later and
+ //removeExistingDcTypes == true
+ if((!dcTypes.remove(existingDcTypesIt.next()) || !addCoMentionDcTypes )
+ && removeExistingDcTypes){
+ existingDcTypesIt.remove(); //remove the dcType
+ }
}
- for(UriRef dcType : dcTypes){ //add missing
- metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
+ if(addCoMentionDcTypes){
+ for(UriRef dcType : dcTypes){ //add missing
+ metadata.add(new TripleImpl(textAnnotation, DC_TYPE, dcType));
+ }
}
//TODO: support also Entities
if(maxConfidence != null){ //set the confidence value (if known)
Modified: stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1544957&r1=1544956&r2=1544957&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/resources/OSGI-INF/metatype/metatype.properties Sun Nov 24 11:01:07 2013
@@ -58,3 +58,10 @@ used in addition to the language detecte
configuration is an empty string to search for labels without any language defined, but for some data \
sets (such as DBpedia.org) that add languages to any labels it might improve resuls to change this \
configuration (e.g. to 'en' in the case of DBpedia.org).
+
+enhancer.engines.comention.adjustExistingConfidence.name=Confidence Adjustment
+enhancer.engines.comention.adjustExistingConfidence.description=Used to adjust the \
+confidence of existing suggestions for fise:TextAnnotation where a Co-Mention is \
+detected by this Engine. Values MUST BE in the range [0..1) (default: 0.33). Setting \
+this to 0.0 will deactivate this feature. The {adjusted-confidence} := {confidence} * \
+(1 - {value}) .. where {value} is the value configured for this property.