You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2014/05/23 13:36:29 UTC

svn commit: r1597063 - in /stanbol/branches/release-0.12/enhancement-engines: dereference/entityhub/src/main/resources/OSGI-INF/metatype/ entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/ entitycomention/src/main/java/o...

Author: rwesten
Date: Fri May 23 11:36:28 2014
New Revision: 1597063

URL: http://svn.apache.org/r1597063
Log:
fix for STANBOL-1342: The Entity Co-Mention engine now uses a ServiceTracker for LabelTokenizers; In case the LabelTokenizer is not able to tokenize the selected text of a TextAnnotation it will be ignored (not considererd for co-mention detection); Minor: removed the custom ServiceReference Comparator from the MainLabelTokenizer as ServiceReference is anyway compareable

Modified:
    stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
    stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
    stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
    stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java

Modified: stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/branches/release-0.12/enhancement-engines/dereference/entityhub/src/main/resources/OSGI-INF/metatype/metatype.properties Fri May 23 11:36:28 2014
@@ -29,7 +29,7 @@ Engine that dereferences Entities by usi
 , SiteManager as well as the Entityhub itself.
 
 #===============================================================================
-#Properties and Options used to configure 
+#Properties and Options used to configure the Entityhub dereference engine 
 #===============================================================================
 enhancer.engines.dereference.fields.name=Dereferenced Fields
 enhancer.engines.dereference.fields.description=The fields that are dereferenced. \
@@ -91,3 +91,16 @@ are dereferenced. If empty all languages
 Only Content Language' option is activated). Enabled languages will be also \
 included if 'Only Content Language' is activated.
 
+#===============================================================================
+#Properties and Options used to configure the shared thread pool
+#=============================================================================== 
+
+org.apache.stanbol.enhancer.engines.dereference.entityhub.shared.SharedDereferenceThreadPool.name=\
+Apache Stanbol Enhancer Engine: Entityhub Dereference Shared Thread Pool
+org.apache.stanbol.enhancer.engines.dereference.entityhub.shared.SharedDereferenceThreadPool=\
+A shared thread pool used by all Entityhub Dereference Engines with the "Shared Thread Pool" \
+feature activated.
+
+enhancer.engines.dereference.entityhub.sharedthreadpool.size.name=Thread Pool Size
+enhancer.engines.dereference.entityhub.sharedthreadpool.size.description=The size \
+of the thread pool used to dereference entities from the Entityhub

Modified: stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/EntityCoMentionEngine.java Fri May 23 11:36:28 2014
@@ -86,8 +86,10 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.osgi.framework.BundleContext;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
+import org.osgi.util.tracker.ServiceTracker;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 /**
@@ -155,8 +157,7 @@ public class EntityCoMentionEngine exten
     @Reference
     protected NamespacePrefixService prefixService;
     
-    @Reference 
-    protected LabelTokenizer labelTokenizer; 
+    private ServiceTracker labelTokenizerTracker;
 
     private double confidenceAdjustmentFactor;
     
@@ -226,6 +227,9 @@ public class EntityCoMentionEngine exten
         }
         confidenceAdjustmentFactor = 1 - confidenceAdjustment;
         //get the metadata later set to the enhancement engine
+        final BundleContext bc = ctx.getBundleContext();
+        labelTokenizerTracker = new ServiceTracker(bc, LabelTokenizer.class.getName(), null); 
+        labelTokenizerTracker.open();
     }
     /**
      * Deactivates this components. 
@@ -235,6 +239,10 @@ public class EntityCoMentionEngine exten
         log.info("deactivate {}[name:{}]",getClass().getSimpleName(),getName());
         textProcessingConfig = null;
         linkerConfig = null;
+        if(labelTokenizerTracker != null){
+            labelTokenizerTracker.close();
+            labelTokenizerTracker = null;
+        }
         super.deactivate(ctx);
     }
     
@@ -267,6 +275,11 @@ public class EntityCoMentionEngine exten
             log.debug("compute co-mentions for ContentItem {} language {}  text={}", 
                 new Object []{ci.getUri().getUnicodeString(), language, StringUtils.abbreviate(at.getSpan(), 100)});
         }
+        LabelTokenizer labelTokenizer = (LabelTokenizer)labelTokenizerTracker.getService();
+        if(labelTokenizer == null){
+            throw new EngineException(this, ci, "No LabelTokenizer available!",null);
+        }
+
         //create the in-memory database for the mentioned Entities
         ContentItemMentionBuilder entityMentionIndex = new ContentItemMentionBuilder(
             labelTokenizer, language, linkerConfig.getDefaultLanguage());
@@ -283,7 +296,7 @@ public class EntityCoMentionEngine exten
             ci.getLock().readLock().unlock();
         }
         EntityLinker entityLinker = new EntityLinker(at,language, 
-            languageConfig, entityMentionIndex, linkerConfig, labelTokenizer,entityMentionIndex);
+            languageConfig, entityMentionIndex, linkerConfig, labelTokenizer ,entityMentionIndex);
         //process
         try {
             entityLinker.process();

Modified: stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitycomention/src/main/java/org/apache/stanbol/enhancer/engines/entitycomention/impl/ContentItemMentionBuilder.java Fri May 23 11:36:28 2014
@@ -73,7 +73,7 @@ public class ContentItemMentionBuilder e
             //      Nouns for the co-mention resolution. In such cases this might result
             //      in additional extractions.
             String[] tokens = tokenizer.tokenize(selectedText, language);
-            if(tokens.length > 1){ //TODO make configurable
+            if(tokens != null && tokens.length > 1){ //TODO make configurable
                 Double confidence = EnhancementEngineHelper.get(metadata,textAnnotation,ENHANCER_CONFIDENCE,Double.class,lf);
                 if(confidence == null || confidence > 0.85){ //TODO make configurable
                     Integer start = EnhancementEngineHelper.get(metadata,textAnnotation,ENHANCER_START,Integer.class,lf);
@@ -81,6 +81,9 @@ public class ContentItemMentionBuilder e
                     registerMention(new EntityMention(textAnnotation,metadata, ENHANCER_SELECTED_TEXT, DC_TYPE, 
                         start != null && end != null ? new Integer[]{start,end} : null));
                 } // else confidence to low
+            } else if(tokens == null){
+                log.warn("Unable to tokenize \"{}\"@{} via tokenizer {} (class: {})!", new Object []{
+                    selectedText,language,tokenizer, tokenizer.getClass().getName()});
             } //else ignore Tokens with a single token
         } // else no selected text
     }

Modified: stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java?rev=1597063&r1=1597062&r2=1597063&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/entitylinking/engine/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/MainLabelTokenizer.java Fri May 23 11:36:28 2014
@@ -19,7 +19,6 @@ package org.apache.stanbol.enhancer.engi
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -56,28 +55,6 @@ public class MainLabelTokenizer implemen
     
     private ServiceTracker labelTokenizerTracker;
     
-    private static final Comparator<ServiceReference> RANKING_COMPARATOR = new Comparator<ServiceReference>() {
-        
-        public int compare(ServiceReference ref1, ServiceReference ref2) {
-            int r1,r2;
-            Object tmp = ref1.getProperty(Constants.SERVICE_RANKING);
-            r1 = tmp != null ? ((Integer)tmp).intValue() : 0;
-            tmp = ref2.getProperty(Constants.SERVICE_RANKING);
-            r2 = tmp != null ? ((Integer)tmp).intValue() : 0;
-            if(r1 == r2){
-                tmp = ref1.getProperty(Constants.SERVICE_ID);
-                long id1 = tmp != null ? ((Long)tmp).longValue() : Long.MAX_VALUE;
-                tmp = ref2.getProperty(Constants.SERVICE_ID);
-                long id2 = tmp != null ? ((Long)tmp).longValue() : Long.MAX_VALUE;
-                //the lowest id must be first -> id1 < id2 -> [id1,id2] -> return -1
-                return id1 < id2 ? -1 : id2 == id1 ? 0 : 1; 
-            } else {
-                //the highest ranking MUST BE first -> r1 < r2 -> [r2,r1] -> return 1
-                return r1 < r2 ? 1:-1;
-            }
-        }        
-    };
-    
     private Map<ServiceReference,LanguageConfiguration> ref2LangConfig = 
             Collections.synchronizedMap(new HashMap<ServiceReference,LanguageConfiguration>());
     
@@ -185,6 +162,7 @@ public class MainLabelTokenizer implemen
     }
 
     
+    @SuppressWarnings("unchecked")
     private List<ServiceReference> initTokenizers(String language) {
         List<ServiceReference> tokenizers = new ArrayList<ServiceReference>();
         if(labelTokenizerTracker.getServiceReferences() != null){
@@ -196,7 +174,7 @@ public class MainLabelTokenizer implemen
             }
         }
         if(tokenizers.size() > 1){
-            Collections.sort(tokenizers,RANKING_COMPARATOR);
+            Collections.sort(tokenizers);
         }
         this.langTokenizers.put(language, tokenizers);
         return tokenizers;