You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/05/23 13:36:29 UTC
svn commit: r1597063 - in /stanbol/branches/release-0.12/enhancement-engines:
dereference/entityhub/src/main/resources/OSGI-INF/metatype/
entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/
entitycomention/src/main/java/o...
Author: rwesten
Date: Fri May 23 11:36:28 2014
New Revision: 1597063
URL: http://svn.apache.org/r1597063
Log:
fix for STANBOL-1342: The Entity Co-Mention engine now uses a ServiceTracker for LabelTokenizers; In case the LabelTokenizer is not able to tokenize the selected text of a TextAnnotation it will be ignored (not considererd for co-mention detection); Minor: removed the custom ServiceReference Comparator from the MainLabelTokenizer as ServiceReference is anyway compareable
Modified:
stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java
Modified: stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties Fri May 23 11:36:28 2014
@@ -29,7 +29,7 @@ Engine that dereferences Entities by usi
, SiteManager as well as the Entityhub itself.
#===============================================================================
-#Properties and Options used to configure
+#Properties and Options used to configure the Entityhub dereference engine
#===============================================================================
enhancer.engines.dereference.fields.name=Dereferenced Fields
enhancer.engines.dereference.fields.description=The fields that are dereferenced. \
@@ -91,3 +91,16 @@ are dereferenced. If empty all languages
Only Content Language' option is activated). Enabled languages will be also \
included if 'Only Content Language' is activated.
+#===============================================================================
+#Properties and Options used to configure the shared thread pool
+#===============================================================================
+
+org.apache.stanbol.enhancer.engines.dereference.entityhub.shared.SharedDereferenceThreadPool.name=\
+Apache Stanbol Enhancer Engine: Entityhub Dereference Shared Thread Pool
+org.apache.stanbol.enhancer.engines.dereference.entityhub.shared.SharedDereferenceThreadPool=\
+A shared thread pool used by all Entityhub Dereference Engines with the "Shared Thread Pool" \
+feature activated.
+
+enhancer.engines.dereference.entityhub.sharedthreadpool.size.name=Thread Pool Size
+enhancer.engines.dereference.entityhub.sharedthreadpool.size.description=The size \
+of the thread pool used to dereference entities from the Entityhub
Modified: stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java Fri May 23 11:36:28 2014
@@ -86,8 +86,10 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.osgi.framework.BundleContext;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
+import org.osgi.util.tracker.ServiceTracker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
@@ -155,8 +157,7 @@ public class EntityCoMentionEngine exten
@Reference
protected NamespacePrefixService prefixService;
- @Reference
- protected LabelTokenizer labelTokenizer;
+ private ServiceTracker labelTokenizerTracker;
private double confidenceAdjustmentFactor;
@@ -226,6 +227,9 @@ public class EntityCoMentionEngine exten
}
confidenceAdjustmentFactor = 1 - confidenceAdjustment;
//get the metadata later set to the enhancement engine
+ final BundleContext bc = ctx.getBundleContext();
+ labelTokenizerTracker = new ServiceTracker(bc, LabelTokenizer.class.getName(), null);
+ labelTokenizerTracker.open();
}
/**
* Deactivates this components.
@@ -235,6 +239,10 @@ public class EntityCoMentionEngine exten
log.info("deactivate {}[name:{}]",getClass().getSimpleName(),getName());
textProcessingConfig = null;
linkerConfig = null;
+ if(labelTokenizerTracker != null){
+ labelTokenizerTracker.close();
+ labelTokenizerTracker = null;
+ }
super.deactivate(ctx);
}
@@ -267,6 +275,11 @@ public class EntityCoMentionEngine exten
log.debug("compute co-mentions for ContentItem {} language {} text={}",
new Object []{ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100)});
}
+ LabelTokenizer labelTokenizer = (LabelTokenizer)labelTokenizerTracker.getService();
+ if(labelTokenizer == null){
+ throw new EngineException(this, ci, "No LabelTokenizer available!",null);
+ }
+
//create the in-memory database for the mentioned Entities
ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(
labelTokenizer, language, linkerConfig.getDefaultLanguage());
@@ -283,7 +296,7 @@ public class EntityCoMentionEngine exten
ci.getLock().readLock().unlock();
}
EntityLinker entityLinker = new EntityLinker(at,language,
- languageConfig, entityMentionIndex, linkerConfig, labelTokenizer,entityMentionIndex);
+ languageConfig, entityMentionIndex, linkerConfig, labelTokenizer ,entityMentionIndex);
//process
try {
entityLinker.process();
Modified: stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java Fri May 23 11:36:28 2014
@@ -73,7 +73,7 @@ public class ContentItemMentionBuilder e
// Nouns for the co-mention resolution. In such cases this might result
// in additional extractions.
String[] tokens = tokenizer.tokenize(selectedText, language);
- if(tokens.length > 1){ //TODO make configurable
+ if(tokens != null && tokens.length > 1){ //TODO make configurable
Double confidence = EnhancementEngineHelper.get(metadata,textAnnotation,ENHANCER_CONFIDENCE,Double.class,lf);
if(confidence == null || confidence > 0.85){ //TODO make configurable
Integer start = EnhancementEngineHelper.get(metadata,textAnnotation,ENHANCER_START,Integer.class,lf);
@@ -81,6 +81,9 @@ public class ContentItemMentionBuilder e
registerMention(new EntityMention(textAnnotation,metadata, ENHANCER_SELECTED_TEXT, DC_TYPE,
start != null && end != null ? new Integer[]{start,end} : null));
} // else confidence to low
+ } else if(tokens == null){
+ log.warn("Unable to tokenize \"{}\"@{} via tokenizer {} (class: {})!", new Object []{
+ selectedText,language,tokenizer, tokenizer.getClass().getName()});
} //else ignore Tokens with a single token
} // else no selected text
}
Modified: stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java Fri May 23 11:36:28 2014
@@ -19,7 +19,6 @@ package org.apache.stanbol.enhancer.engi
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -56,28 +55,6 @@ public class MainLabelTokenizer implemen
private ServiceTracker labelTokenizerTracker;
- private static final Comparator<ServiceReference> RANKING_COMPARATOR = new Comparator<ServiceReference>() {
-
- public int compare(ServiceReference ref1, ServiceReference ref2) {
- int r1,r2;
- Object tmp = ref1.getProperty(Constants.SERVICE_RANKING);
- r1 = tmp != null ? ((Integer)tmp).intValue() : 0;
- tmp = ref2.getProperty(Constants.SERVICE_RANKING);
- r2 = tmp != null ? ((Integer)tmp).intValue() : 0;
- if(r1 == r2){
- tmp = ref1.getProperty(Constants.SERVICE_ID);
- long id1 = tmp != null ? ((Long)tmp).longValue() : Long.MAX_VALUE;
- tmp = ref2.getProperty(Constants.SERVICE_ID);
- long id2 = tmp != null ? ((Long)tmp).longValue() : Long.MAX_VALUE;
- //the lowest id must be first -> id1 < id2 -> [id1,id2] -> return -1
- return id1 < id2 ? -1 : id2 == id1 ? 0 : 1;
- } else {
- //the highest ranking MUST BE first -> r1 < r2 -> [r2,r1] -> return 1
- return r1 < r2 ? 1:-1;
- }
- }
- };
-
private Map<ServiceReference,LanguageConfiguration> ref2LangConfig =
Collections.synchronizedMap(new HashMap<ServiceReference,LanguageConfiguration>());
@@ -185,6 +162,7 @@ public class MainLabelTokenizer implemen
}
+ @SuppressWarnings("unchecked")
private List<ServiceReference> initTokenizers(String language) {
List<ServiceReference> tokenizers = new ArrayList<ServiceReference>();
if(labelTokenizerTracker.getServiceReferences() != null){
@@ -196,7 +174,7 @@ public class MainLabelTokenizer implemen
}
}
if(tokenizers.size() > 1){
- Collections.sort(tokenizers,RANKING_COMPARATOR);
+ Collections.sort(tokenizers);
}
this.langTokenizers.put(language, tokenizers);
return tokenizers;