You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2015/04/16 09:51:12 UTC
svn commit: r1674012 - in /stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src: main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/ main/resources/OSGI-INF/metatype/ test/java/org/apache/stanbol/enhancer/engines/lucenefst...

Author: rwesten
Date: Thu Apr 16 07:51:12 2015
New Revision: 1674012

URL: http://svn.apache.org/r1674012
Log:
fix for STANBOL-1416 and implementation for STANBOL-1418; Also greatly improved DEBUG level loggings for the FstLinkingEngineComponent. For detailed Infos see the issues

Added:
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java   (with props)
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java   (with props)
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java   (with props)
Modified:
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
    stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java Thu Apr 16 07:51:12 2015
@@ -28,6 +28,7 @@ import static org.apache.stanbol.enhance
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -62,8 +63,8 @@ import org.apache.stanbol.enhancer.engin
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.TaggingSession.Corpus;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
-import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -101,12 +102,23 @@ public class FstLinkingEngine implements
 
     protected final TextProcessingConfig tpConfig;
     protected final EntityLinkerConfig elConfig;
+    
+    /**
+     * Used in the {@link LinkingModeEnum#NER} to filter entities. For that configured
+     * mappings for the {@link NerTag#getType()} and {@link NerTag#getTag()} values 
+     * (the key) are mapped with the actual {@link Match#getTypes()} (the value set). 
+     * The <code>null</code> value is interpreted as wildCard (any type matches). An
+     * empty mapping is interpreted as an blacklist (do not lookup Named Entities
+     * with that {@link NerTag#getType() type}/{@link NerTag#getTag() tag}
+     */
+    protected final Map<String,Set<String>> neTypeMappings;
 
     private IndexConfiguration indexConfig;
 
     public FstLinkingEngine(String name, LinkingModeEnum linkingMode, 
             IndexConfiguration indexConfig,
-            TextProcessingConfig tpConfig, EntityLinkerConfig elConfig) {
+            TextProcessingConfig tpConfig, EntityLinkerConfig elConfig,
+            Map<String,Set<String>> neTypeMappings) {
         if (StringUtils.isBlank(name)) {
             throw new IllegalArgumentException("The parsed name MUST NOT be NULL nor blank!");
         }
@@ -124,6 +136,11 @@ public class FstLinkingEngine implements
             throw new IllegalArgumentException("The parsed Entity Linking configuration MUST NOT be NULL");
         }
         this.elConfig = elConfig;
+        if(linkingMode == LinkingModeEnum.NER && neTypeMappings == null){
+            throw new IllegalArgumentException("The NamedEntity type mappings MUST NOT be NULL "
+                    + "if the LinkingMode is NER!");
+        }
+        this.neTypeMappings = neTypeMappings;
     }
 
     @Override
@@ -155,9 +172,17 @@ public class FstLinkingEngine implements
         }
         // we need a detected language, the AnalyzedText contentPart with
         // Tokens.
-        AnalysedText at = getAnalysedText(this, ci, false);
-        if(at == null && linkingMode == LinkingModeEnum.PLAIN){
-            return NlpEngineHelper.getPlainText(this, ci, false) != null ? ENHANCE_ASYNC : CANNOT_ENHANCE;
+        AnalysedText at = AnalysedTextUtils.getAnalysedText(ci);
+        if(at == null){
+            if( linkingMode == LinkingModeEnum.PLAIN){
+                return NlpEngineHelper.getPlainText(this, ci, false) != null ? ENHANCE_ASYNC : CANNOT_ENHANCE;
+            } else {
+                log.warn("Unable to process {} with engine name={} and mode={} "
+                        + ": Missing AnalyzedText content part. Please ensure that "
+                        + "NLP processing results are available before FST linking!", 
+                        new Object[]{ci,name,linkingMode});
+                return CANNOT_ENHANCE;
+            }
         } else {
             if(linkingMode == LinkingModeEnum.PLAIN){
                 return ENHANCE_ASYNC;
@@ -167,7 +192,7 @@ public class FstLinkingEngine implements
                 log.warn("Unable to process {} with engine name={} and mode={} "
                     + "as the AnalyzedText does not contain any Tokens!", 
                     new Object[]{ci,name,linkingMode});
-                return at.getTokens().hasNext() ? ENHANCE_ASYNC : CANNOT_ENHANCE;
+                return CANNOT_ENHANCE;
             }
         }
     }
@@ -243,7 +268,7 @@ public class FstLinkingEngine implements
                     log.debug(" - sum fst: {} ms", taggingEnd - taggingStart);
                 }
             }
-            int matches = match(content,tags.values());
+            int matches = match(content, tags.values(), session.entityMentionTypes);
             log.debug(" - loaded {} ({} loaded, {} cached, {} appended) Matches in {} ms", 
                     new Object[]{matches, session.getSessionDocLoaded(),
                         session.getSessionDocCached(), session.getSessionDocAppended(),
@@ -273,7 +298,7 @@ public class FstLinkingEngine implements
         tags.clear(); //help the GC
     }
 
-    private int match(String text, Collection<Tag> tags) {
+    private int match(String text, Collection<Tag> tags, Map<int[],Set<String>> emTypes) {
         log.trace("  ... process matches for {} extracted Tags:",tags.size());
         int matchCount = 0;
         Iterator<Tag> tagIt = tags.iterator();
@@ -294,7 +319,20 @@ public class FstLinkingEngine implements
                     log.trace(" {}. {}", i++,  match.getUri());
                 }
                 matchCount++;
-                if(!filterEntityByType(match.getTypes().iterator())){
+                final boolean filterType;
+                if(linkingMode == LinkingModeEnum.NER){
+                    Set<String> types = emTypes.get(new int[]{tag.getStart(), tag.getEnd()});
+                    if(types == null){
+                        log.warn(" - missing NE types for Named Entity [{},{}] {}!",
+                            new Object[]{tag.getStart(), tag.getEnd(),tag.getAnchor()});
+                        filterType = true;
+                    } else {
+                        filterType = filterByNamedEntityType(match.getTypes().iterator(), types);
+                    }
+                } else {
+                    filterType = filterEntityByType(match.getTypes().iterator());
+                }
+                if(!filterType){
                     int distance = Integer.MAX_VALUE;
                     Literal matchLabel = null;
                     for(Iterator<Literal> it = match.getLabels().iterator(); it.hasNext() && distance > 0;){
@@ -370,6 +408,44 @@ public class FstLinkingEngine implements
         return matchCount;
     }
     /**
+     * Filter Entities based on matching the entity types with the named entity types.
+     * The {@link #neTypeMappings} are used to convert named entity types to 
+     * entity types. 
+     * @param eTypes the types of the entity
+     * @param neTypes the types of the named entity
+     * @return
+     */
+    private boolean filterByNamedEntityType(Iterator<UriRef> eTypes, Set<String> neTypes) {
+        //first collect the allowed entity types
+        Set<String> entityTypes = new HashSet<String>();
+        for(String neType : neTypes){
+            if(neType != null){
+                Set<String> mappings = neTypeMappings.get(neType);
+                if(mappings != null){
+                    if(mappings.contains(null)){
+                        //found an wildcard
+                        return false; //do not filter
+                    } else {
+                        entityTypes.addAll(mappings);
+                    }
+                } //else no mapping for neType (tag or uri) present
+            }
+        }
+        if(entityTypes.isEmpty()){
+            return true; //no match possible .. filter
+        }
+        //second check the actual entity types against the allowed
+        while(eTypes.hasNext()){
+            UriRef typeUri = eTypes.next();
+            if(typeUri != null && entityTypes.contains(typeUri.getUnicodeString())){
+                return false; //we found an match .. do not filter
+            }
+        }
+        //no match found ... filter
+        return true;
+    }
+
+    /**
      * Applies the configured entity type based filters
      * @param entityTypes
      * @return
@@ -432,11 +508,23 @@ public class FstLinkingEngine implements
                 tokenStream = baseTokenStream;
                 reducer = TagClusterReducer.LONGEST_DOMINANT_RIGHT;
                 break;
-//            case NER:
+            case NER:
+                //this uses the NamedEntityTokenFilter as tokenStream and a
+                //combination with the longest dominant right as reducer 
+                NamedEntityTokenFilter neTokenFilter = new NamedEntityTokenFilter(
+                    baseTokenStream, at, session.getLanguage(), neTypeMappings.keySet(),
+                    session.entityMentionTypes);
+                tokenStream = neTokenFilter;
+                reducer = new ChainedTagClusterReducer(neTokenFilter,
+                    TagClusterReducer.LONGEST_DOMINANT_RIGHT);
+                break;
             case LINKABLE_TOKEN:
+                //this uses the LinkableTokenFilter as tokenStream
                 LinkableTokenFilter linkableTokenFilter = new LinkableTokenFilter(baseTokenStream, 
                     at, session.getLanguage(), tpConfig.getConfiguration(session.getLanguage()),
                     elConfig.getMinChunkMatchScore(), elConfig.getMinFoundTokens());
+                //NOTE that the  LinkableTokenFilter implements longest dominant right
+                // based on the matchable span of tags (instead of the whole span).
                 reducer = new ChainedTagClusterReducer(
                     linkableTokenFilter,TagClusterReducer.ALL);
                 tokenStream = linkableTokenFilter;
@@ -446,11 +534,9 @@ public class FstLinkingEngine implements
                     + linkingMode + "! Please adapt implementation to changed Enumeration!");
         }
         log.debug(" - tokenStream: {}", tokenStream);
-        log.debug(" - reducer: {}", reducer);
-        //we use two TagClusterReducer implementations.
-        // (1) the linkableTokenFilter filters all tags that do not overlap any
-        //     linkable Token
-        // (2) the LONGEST_DOMINANT_RIGHT reducer (TODO: make configurable)
+        log.debug(" - reducer: {} (class: {})", reducer, reducer.getClass().getName());
+        
+        //Now process the document
         final long[] time = new long[]{0};
         new Tagger(corpus.getFst(), tokenStream, reducer,session.isSkipAltTokens()) {
             

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java Thu Apr 16 07:51:12 2015
@@ -36,9 +36,13 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.Dictionary;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -63,6 +67,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
 import org.apache.solr.core.SolrCore;
+import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
 import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
 import org.apache.stanbol.commons.solr.IndexReference;
 import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
@@ -70,6 +75,7 @@ import org.apache.stanbol.enhancer.engin
 import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.EntityCacheManager;
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.FastLRUCacheManager;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
@@ -132,35 +138,12 @@ import com.google.common.util.concurrent
     value=IndexConfiguration.DEFAULT_FST_FOLDER),
     @Property(name=IndexConfiguration.SOLR_TYPE_FIELD, value="rdf:type"),
     @Property(name=IndexConfiguration.SOLR_RANKING_FIELD, value="entityhub:entityRank"),
-//  @Property(name=REDIRECT_FIELD,value="rdfs:seeAlso"),
-//  @Property(name=REDIRECT_MODE,options={
-//      @PropertyOption(
-//          value='%'+REDIRECT_MODE+".option.ignore",
-//          name="IGNORE"),
-//      @PropertyOption(
-//          value='%'+REDIRECT_MODE+".option.addValues",
-//          name="ADD_VALUES"),
-//      @PropertyOption(
-//              value='%'+REDIRECT_MODE+".option.follow",
-//              name="FOLLOW")
-//      },value="IGNORE"),
     @Property(name=FstLinkingEngineComponent.FST_THREAD_POOL_SIZE,
         intValue=FstLinkingEngineComponent.DEFAULT_FST_THREAD_POOL_SIZE),
     @Property(name=FstLinkingEngineComponent.ENTITY_CACHE_SIZE, 
         intValue=FstLinkingEngineComponent.DEFAULT_ENTITY_CACHE_SIZE),
     @Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
     @Property(name=INCLUDE_SIMILAR_SCORE, boolValue=DEFAULT_INCLUDE_SIMILAR_SCORE),
-    @Property(name=FstLinkingEngineComponent.LINKING_MODE,  options={
-            @PropertyOption(
-                value='%'+FstLinkingEngineComponent.LINKING_MODE+".option.plain",
-                name="PLAIN"),
-            @PropertyOption(
-                value='%'+FstLinkingEngineComponent.LINKING_MODE+".option.linkableToken",
-                name="LINKABLE_TOKEN") //,
-            //@PropertyOption(
-            //    value='%'+FstLinkingEngineComponent.LINKING_MODE+".option.ner",
-            //    name="NER")
-        },value="LINKABLE_TOKEN"),
     @Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
     @Property(name=PROCESS_ONLY_PROPER_NOUNS_STATE, boolValue=DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE),
     @Property(name=PROCESSED_LANGUAGES, cardinality=Integer.MAX_VALUE,
@@ -178,9 +161,6 @@ import com.google.common.util.concurrent
         "dbp-ont:Event; schema:Event > dbp-ont:Event",
         "schema:Product > schema:Product",
         "skos:Concept > skos:Concept"}),
-//    @Property(name=DEREFERENCE_ENTITIES, boolValue=DEFAULT_DEREFERENCE_ENTITIES_STATE),
-//    @Property(name=DEREFERENCE_ENTITIES_FIELDS,cardinality=Integer.MAX_VALUE,
-//        value={"rdfs:comment","geo:lat","geo:long","foaf:depiction","dbp-ont:thumbnail"}),
     @Property(name=SERVICE_RANKING,intValue=0)
 })
 public class FstLinkingEngineComponent {
@@ -206,6 +186,13 @@ public class FstLinkingEngineComponent {
     public static final String LINKING_MODE = "enhancer.engines.linking.lucenefst.mode";
     
     /**
+     * Allows to configure mappings of NamedEntity Types to types of Entities in the
+     * vocabulary. Configured keys are matched against the {@link NerTag#getTag()} AND
+     * {@link NerTag#getType()} values of NamedEntities. Configured Values are mapped
+     * against the values of the configured {@link IndexConfiguration#SOLR_TYPE_FIELD}.
+     */
+    public static final String NAMED_ENTITY_TYPE_MAPPINGS = "enhancer.engines.linking.lucenefst.neTypeMapping";
+    /**
      * The size of the thread pool used to create FST models (default=1). Creating
      * such models does need a lot of memory. Expect values up to 10times of the
      * build model. So while this task can easily performed concurrently users need
@@ -242,7 +229,7 @@ public class FstLinkingEngineComponent {
      */
     private static final Integer FST_DEFAULT_MIN_FOUND_TOKENS = 2;
     
-    private final Logger log = LoggerFactory.getLogger(FstLinkingEngineComponent.class);
+    protected final Logger log = LoggerFactory.getLogger(FstLinkingEngineComponent.class);
     /**
      * the name for the EnhancementEngine registered by this component
      */
@@ -257,7 +244,7 @@ public class FstLinkingEngineComponent {
      * used to resolve '{prefix}:{local-name}' used within the engines configuration
      */
     @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
-    protected NamespacePrefixService prefixService;    
+    private NamespacePrefixService prefixService;    
 
     /**
      * Holds the FST configuration parsed to the engine
@@ -322,7 +309,7 @@ public class FstLinkingEngineComponent {
      * The bundle context for this component. Also used to track dependencies
      * and register the {@link #engineRegistration}
      */
-    private BundleContext bundleContext;
+    protected BundleContext bundleContext;
     
     /**
      * Thread pool used for the runtime creation of FST modles.
@@ -355,6 +342,8 @@ public class FstLinkingEngineComponent {
      * The size of the EntityCache ( <code>0</code> ... means deactivated)
      */
     private int entityCacheSize;
+
+    private Map<String,Set<String>> nerTypeMappings;
     
     /**
      * Default constructor as used by OSGI. This expects that 
@@ -366,9 +355,63 @@ public class FstLinkingEngineComponent {
     @Activate
     @SuppressWarnings("unchecked")
     protected void activate(ComponentContext ctx) throws ConfigurationException {
-        log.info("activate {}",getClass().getSimpleName());
+        log.info("activate {}", getClass().getSimpleName());
+        log.debug("  - instance: {}", this);
+        log.debug("  - config: {}", ctx.getProperties());
         this.bundleContext = ctx.getBundleContext();
-        Dictionary<String,Object> properties = ctx.getProperties();
+        //(0) parse the linking mode
+        applyConfig(parseLinkingMode(ctx), ctx.getProperties(), prefixService);
+    }
+
+    /**
+     * Parses the LinkingMode from the {@link #LINKING_MODE} property. This
+     * allows to use this component to configure FST linking engines for any
+     * supported LinkingMode. If the {@link #LINKING_MODE} is not present the
+     * default {@link LinkingModeEnum#LINKABLE_TOKEN} is returned. <p>
+     * <b>NOTE:</b>Typically
+     * users will want to use the <ul>
+     * <li>{@link PlainFstLinkingComponnet} to configure FST engines for the 
+     * {@link LinkingModeEnum#PLAIN}
+     * <li> {@link NamedEntityFstLinkingComponnet} to configure FST engines for
+     * the {@link LinkingModeEnum#NER}
+     * </ul>
+     * but is is also fine to explicitly specify a {@link #LINKING_MODE} linking
+     * mode when using this component to configure the FST linking engine.
+     * @param ctx the parsed component context
+     * @return the parsed {@link LinkingModeEnum}
+     * @throws ConfigurationException
+     */
+    private LinkingModeEnum parseLinkingMode(ComponentContext ctx) throws ConfigurationException {
+        Object value = ctx.getProperties().get(LINKING_MODE);
+        LinkingModeEnum linkingMode;
+        if(value == null || StringUtils.isBlank(value.toString())){
+            linkingMode = LinkingModeEnum.LINKABLE_TOKEN;
+        } else {
+            try {
+                linkingMode = LinkingModeEnum.valueOf(value.toString());
+            } catch(IllegalArgumentException e){
+                throw new ConfigurationException(LINKING_MODE, "The parsed value '"
+                    +value+"' (type: "+value.getClass().getName()+") is not a member "
+                    + "of the enum (members: "+ Arrays.toString(LinkingModeEnum.values())
+                    + ")!",e);
+            }
+        }
+        return linkingMode;
+    }
+    /**
+     * Called by {@link #activate(ComponentContext)}, 
+     * {@link PlainFstLinkingComponnet#activate(ComponentContext)} and 
+     * {@link NamedEntityFstLinkingComponnet#activate(ComponentContext)} to
+     * apply the parsed {@link ComponentContext#getProperties()}. The
+     * {@link LinkingModeEnum linking mode} is parsed separately as OSGI does not
+     * allow to modify the parsed config and sup-classes do need to override
+     * the linking mode.
+     * @param linkingMode the linking mode
+     * @param properties
+     * @throws ConfigurationException
+     */
+    protected void applyConfig(LinkingModeEnum linkingMode, Dictionary<String,Object> properties, NamespacePrefixService prefixService)
+            throws ConfigurationException {
         //(0) The name for the Enhancement Engine and the basic metadata
         Object value = properties.get(PROPERTY_NAME);
         if(value == null || value.toString().isEmpty()){
@@ -381,21 +424,10 @@ public class FstLinkingEngineComponent {
         engineMetadata.put(PROPERTY_NAME, this.engineName);
         value = properties.get(Constants.SERVICE_RANKING);
         engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
-        //(0) parse the linking mode
-        value = properties.get(LINKING_MODE);
-        if(value == null || StringUtils.isBlank(value.toString())){
-            this.linkingMode = LinkingModeEnum.LINKABLE_TOKEN;
-        } else {
-            try {
-                this.linkingMode = LinkingModeEnum.valueOf(value.toString());
-            } catch(IllegalArgumentException e){
-                throw new ConfigurationException(LINKING_MODE, "The parsed value '"
-                    +value+"' (type: "+value.getClass().getName()+") is not a member "
-                    + "of the enum (members: "+ Arrays.toString(LinkingModeEnum.values())
-                    + ")!",e);
-            }
-        }
-        log.info(" - linking mode: {}",linkingMode);
+        
+        //(0) set the linking mode
+        this.linkingMode = linkingMode;
+        log.info(" - linking mode: {}", linkingMode);
         
         //(1) parse the TextProcessing configuration
         //TODO: decide if we should use the TextProcessingConfig for this engine
@@ -561,8 +593,70 @@ public class FstLinkingEngineComponent {
         } else {
             solrRankingField = value.toString().trim();
         }
+        //(10) parse the NamedEntity type mappings (if linkingMode = NER)
+        if(linkingMode == LinkingModeEnum.NER){
+            nerTypeMappings = new HashMap<String,Set<String>>();
+            value = properties.get(NAMED_ENTITY_TYPE_MAPPINGS);
+            if(value instanceof String[]){ //support array
+                value = Arrays.asList((String[])value);
+            } else if(value instanceof String) { //single value
+                value = Collections.singleton(value);
+            }
+            if(value instanceof Collection<?>){ //and collection
+                log.info(" - process Named Entity Type Mappings (used by LinkingMode: {})",linkingMode);
+                configs : for(Object o : (Iterable<?>)value){
+                    if(o != null){
+                        StringBuilder usage = new StringBuilder("useage: ");
+                        usage.append("'{namedEntity-tag-or-uri} > {entityType-1}[,{entityType-n}]'");
+                        String[] config = o.toString().split(">");
+                        String namedEntityType = config[0].trim();
+                        if(namedEntityType.isEmpty()){
+                            log.warn("Invalid Type Mapping Config '{}': Missing namedEntityType ({}) -> ignore this config",
+                                o,usage);
+                            continue configs;
+                        }
+                        if(NamespaceMappingUtils.getPrefix(namedEntityType) != null){
+                            namedEntityType = NamespaceMappingUtils.getConfiguredUri(
+                                prefixService, NAMED_ENTITY_TYPE_MAPPINGS,namedEntityType);
+                        }
+                        if(config.length < 2 || config[1].isEmpty()){
+                            log.warn("Invalid Type Mapping Config '{}': Missing dc:type URI '{}' ({}) -> ignore this config",
+                                o,usage);
+                            continue configs;
+                        }
+                        String entityTypes = config[1].trim();
+                        if(config.length > 2){
+                            log.warn("Configuration after 2nd '>' gets ignored. Will use mapping '{} > {}' from config {}",
+                                new Object[]{namedEntityType,entityTypes,o});
+                        }
+                        Set<String> types = nerTypeMappings.get(namedEntityType);
+                        if(types == null){ //add new element to the mapping
+                            types = new HashSet<String>();
+                            nerTypeMappings.put(namedEntityType, types);
+                        }
+                        for(String entityType : entityTypes.split(";")){
+                            entityType = entityType.trim();
+                            if(!entityType.isEmpty()){
+                                String typeUri;
+                                if("*".equals(entityType)){
+                                    typeUri = null; //null is used as wildcard
+                                } else {
+                                    typeUri = NamespaceMappingUtils.getConfiguredUri(
+                                        prefixService, NAMED_ENTITY_TYPE_MAPPINGS, entityType);
+                                }
+                                log.info("   - add {} > {}", namedEntityType, typeUri);
+                                types.add(typeUri);
+                            } //else ignore empty mapping
+                        }
+                    }
+                }
+            } else { //no mappings defined ... set wildcard mapping
+                log.info(" - No Named Entity type mappings configured. Will use wildcard mappings");
+                nerTypeMappings = Collections.singletonMap(null, Collections.<String>singleton(null));
+            }
+        }
         
-        //(10) start tracking the SolrCore
+        //(11) start tracking the SolrCore
         try {
             solrServerTracker = new RegisteredSolrServerTracker(
                 bundleContext, indexReference, null){
@@ -599,7 +693,18 @@ public class FstLinkingEngineComponent {
             throw new ConfigurationException(SOLR_CORE, "parsed SolrCore name '"
                 + value.toString()+"' is invalid (expected: '[{server-name}:]{indexname}'");
         }
-        solrServerTracker.open();
+        try {
+            solrServerTracker.open();
+        } catch(RuntimeException e){
+            //FIX for STANBOL-1416 (see https://issues.apache.org/jira/browse/STANBOL-1416)
+            //If an available SolrCore can not be correctly initialized we will
+            //get the exception here. In this case we want this component to be
+            //activated and waiting for further service events. Because of that
+            //we catch here the exception.
+            log.debug("Error while processing existing SolrCore Service during "
+                    + "opening SolrServiceTracker ... waiting for further service"
+                    + "Events", e);
+        }
     }
     
     /**
@@ -712,18 +817,28 @@ public class FstLinkingEngineComponent {
             } else {
             	log.info("  ... no corpus for default language {} available", defaultCoprous);
             }
-            //set the index configuration to the field;
+            
+            //check if the old configuration is still present
+            if(this.engineRegistration != null){
+                unregisterEngine();
+            }
+            
+            //create the new configuration
+            
+            //set the newly configured instances to the fields
             this.indexConfig = indexConfig;
+            this.solrServerReference = reference;
+            this.solrCore = core;
+            //create the new FST linking engine instance
             FstLinkingEngine engine = new FstLinkingEngine(engineName, 
                 linkingMode, indexConfig,
-                textProcessingConfig, entityLinkerConfig);
+                textProcessingConfig, entityLinkerConfig, nerTypeMappings);
+            //register it as a service
             String[] services = new String [] {
                     EnhancementEngine.class.getName(),
                     ServiceProperties.class.getName()};
             log.info(" ... register {}: {}", engine.getClass().getSimpleName(),engineName);
             this.engineRegistration = bundleContext.registerService(services,engine, engineMetadata);
-            this.solrServerReference = reference;
-            this.solrCore = core;
         }
 
         
@@ -765,12 +880,21 @@ public class FstLinkingEngineComponent {
      * rests the fields. If no engine is registered this does nothing!
      */
     private void unregisterEngine() {
+        log.debug("> in unregisterEngine() ...");
         //use local copies for method calls to avoid concurrency issues
         ServiceRegistration engineRegistration = this.engineRegistration;
         if(engineRegistration != null){
             log.info(" ... unregister Lucene FSTLinkingEngine {}",engineName);
-            engineRegistration.unregister();
+            try {
+                engineRegistration.unregister();
+            } catch(IllegalStateException e) {
+                //this is unexpected but can be ignored
+                log.info("Unexpected State: Service for FSTLinkingEngine "
+                        + engineName+" was already deactivated.", e);
+            }
             this.engineRegistration = null; //reset the field
+        } else {
+            log.debug(" ... no engine registration present");
         }
         solrServerReference = null;
         SolrCore solrServer = this.solrCore;
@@ -778,6 +902,8 @@ public class FstLinkingEngineComponent {
             log.debug(" ... unregister SolrCore {}", solrServer.getName());
             solrServer.close(); //decrease the reference count!!
             this.solrCore = null; //rest the field
+        } else {
+            log.debug(" ... no SolrCore present");
         }
         //deactivate the index configuration if present
         if(indexConfig != null){
@@ -790,6 +916,8 @@ public class FstLinkingEngineComponent {
                 cacheManager.close();
             }
             indexConfig = null;
+        } else {
+            log.debug(" ... no index config present");
         }
     }
 
@@ -834,7 +962,11 @@ public class FstLinkingEngineComponent {
      */
     @Deactivate
     protected void deactivate(ComponentContext ctx) {
-        log.info(" ... deactivate {}: {}",getClass().getSimpleName(), engineName);
+        log.info(" ... deactivate {}: {} (CompInst: {})",new Object[] {
+                getClass().getSimpleName(), 
+                engineName, ctx.getComponentInstance()});
+        log.debug("  - instance: {}", this);
+        log.debug("  - config: {}", ctx.getProperties());
         if(solrServerTracker != null){
             //closing the tracker will also cause registered engines to be
             //unregistered as service (see #updateEngineRegistration())

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java Thu Apr 16 07:51:12 2015
@@ -57,12 +57,12 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Class the ensures that only {@link TokenData#isLinkable linkable} Tokens
+ * Class that ensures that only {@link TokenData#isLinkable linkable} Tokens
  * are processed.<p>
  * This is ensured on two places:<ol>
  * <li> Classifies Tokens in the Solr {@link TokenStream} with the {@link TaggingAttribute}
  * based on NLP processing results present in the {@link AnalysedText}. This
- * implementation Classifies Token similar to the {@link EntityLinkingEngine}.
+ * implementation classifies Token similar to the {@link EntityLinkingEngine}.
  * It uses the {@link TextProcessingConfig} for its configuration.<p>
  * <li> Implements {@link TagClusterReducer} to ensure that all {@link TagLL tags}
  * that do not overlap with any {@link TokenData#isLinkable linkable} are

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java Thu Apr 16 07:51:12 2015
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.stanbol.enhancer.engines.lucenefstlinking;
 
 import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
@@ -15,10 +31,10 @@ public enum LinkingModeEnum {
      * or even only {@link Pos#ProperNoun} - depending on the 
      * {@link TextProcessingConfig} 
      */
-    LINKABLE_TOKEN //,
-//    /**
-//     * Only {@link NerTag}s are linked with the vocabualry
-//     */
-//    NER
+    LINKABLE_TOKEN,
+    /**
+     * Only {@link NerTag}s are linked with the vocabualry
+     */
+    NER
 
 }

Added: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java?rev=1674012&view=auto
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java (added)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java Thu Apr 16 07:51:12 2015
@@ -0,0 +1,118 @@
+package org.apache.stanbol.enhancer.engines.lucenefstlinking;
+
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MATCHING_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_SUGGESTIONS;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.SUGGESTIONS;
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
+import static org.osgi.framework.Constants.SERVICE_RANKING;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.PropertyOption;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * OSGI component used to configure a {@link FstLinkingEngine} with
+ * {@link LinkingModeEnum#NER}. <p>
+ * <b>NOTE:</b> Using this Engine requires {@link NerTag}s to be present in the
+ * {@link AnalysedText} content part. In addition {@link NerTag#getTag()} and
+ * {@link NerTag#getType()} values need to be mapped to expected Entity types
+ * in the linked vocabulary. This is configured by using the 
+ * {@link FstLinkingEngineComponent#NAMED_ENTITY_TYPE_MAPPINGS} property. 
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+@Component(
+    configurationFactory = true, 
+    policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
+    specVersion = "1.1", 
+    metatype = true, 
+    immediate = true, 
+    inherit = false)
+@Properties(value={
+    @Property(name=PROPERTY_NAME), //the name of the engine
+    @Property(name=FstLinkingEngineComponent.SOLR_CORE),
+    @Property(name=IndexConfiguration.FIELD_ENCODING, options={
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.none",
+            name="None"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.solrYard",
+            name="SolrYard"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusPrefix",
+            name="MinusPrefix"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscorePrefix",
+            name="UnderscorePrefix"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusSuffix",
+            name="MinusSuffix"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscoreSuffix",
+            name="UnderscoreSuffix"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.atPrefix",
+            name="AtPrefix"),
+        @PropertyOption(
+            value='%'+IndexConfiguration.FIELD_ENCODING+".option.atSuffix",
+            name="AtSuffix")
+        },value="SolrYard"),
+    @Property(name=IndexConfiguration.FST_CONFIG, cardinality=Integer.MAX_VALUE),
+    @Property(name=IndexConfiguration.FST_FOLDER, 
+    value=IndexConfiguration.DEFAULT_FST_FOLDER),
+    @Property(name=IndexConfiguration.SOLR_TYPE_FIELD, value="rdf:type"),
+    @Property(name=IndexConfiguration.SOLR_RANKING_FIELD, value="entityhub:entityRank"),
+    @Property(name=FstLinkingEngineComponent.FST_THREAD_POOL_SIZE,
+        intValue=FstLinkingEngineComponent.DEFAULT_FST_THREAD_POOL_SIZE),
+    @Property(name=FstLinkingEngineComponent.ENTITY_CACHE_SIZE, 
+        intValue=FstLinkingEngineComponent.DEFAULT_ENTITY_CACHE_SIZE),
+    @Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
+    @Property(name=INCLUDE_SIMILAR_SCORE, boolValue=DEFAULT_INCLUDE_SIMILAR_SCORE),
+    @Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
+    @Property(name=DEFAULT_MATCHING_LANGUAGE,value=""),
+    @Property(name=FstLinkingEngineComponent.NAMED_ENTITY_TYPE_MAPPINGS, 
+        cardinality=Integer.MAX_VALUE, value={
+            "dbp-ont:Person > dbp-ont:Person; schema:Person; foaf:Person",
+            "dbp-ont:Organisation > dbp-ont:Organisation; dbp-ont:Newspaper; schema:Organization",
+            "dbp-ont:Place > dbp-ont:Place; schema:Place; geonames:Feature"}),
+    @Property(name=SERVICE_RANKING,intValue=0)
+})
+public class NamedEntityFstLinkingComponnet extends FstLinkingEngineComponent {
+
+    /**
+     * used to resolve '{prefix}:{local-name}' used within the engines configuration
+     */
+    @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
+    private NamespacePrefixService prefixService;    
+
+    
+    @Activate
+    @Override
+    protected void activate(ComponentContext ctx) throws ConfigurationException {
+        log.info("activate {}",getClass().getSimpleName());
+        this.bundleContext = ctx.getBundleContext();
+        super.applyConfig(LinkingModeEnum.NER, ctx.getProperties(), prefixService);
+    }
+    
+    @Deactivate
+    @Override
+    protected void deactivate(ComponentContext ctx) {
+        super.deactivate(ctx);
+    }
+}

Propchange: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java?rev=1674012&view=auto
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java (added)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java Thu Apr 16 07:51:12 2015
@@ -0,0 +1,259 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.lucenefstlinking;
+
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
+
+import java.io.IOException;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.Set;
+
+import org.apache.commons.collections.Predicate;
+import org.apache.commons.collections.iterators.FilterIterator;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.Chunk;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.opensextant.solrtexttagger.TagClusterReducer;
+import org.opensextant.solrtexttagger.TagLL;
+import org.opensextant.solrtexttagger.TaggingAttribute;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Class that ensures that only Tokens within a {@link Chunk} with a 
+ * {@link NerTag} are processed.<p>
+ * This is ensured on two places:<ol>
+ * <li> Classifies Tokens in the Solr {@link TokenStream} with the {@link TaggingAttribute}
+ * based on {@link NerTag}s present in the {@link AnalysedText}.<p>
+ * <li> Implements {@link TagClusterReducer} to ensure that all {@link TagLL tags}
+ * that do not cover the whole Named Entity are removed from the Cluster.
+ * </ol>
+ * <b> Implementation Details</b><p>
+ * The {@link TokenStream} implementation of this class does set
+ * <code>{@link TaggingAttribute#isTaggable()} == ture</code> if the do overlap 
+ * with a {@link Chunk} having an {@link NerTag}
+ * <p>
+ * The {@link TagClusterReducer} implementation keeps track of Chunks with 
+ * {@link NerTag} while iterating over the {@link TokenStream} and adds them to 
+ * the end of a List. When {@link TagClusterReducer#reduce(TagLL[])} is called 
+ * tags of the cluster are checked if they do cover Chunks with a {@link NerTag}.
+ * If they do not they are removed from the cluster.
+ * <p>
+ * This implementation was derived from the {@link LinkableTokenFilter}
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+public final class NamedEntityTokenFilter extends TokenFilter implements TagClusterReducer{
+
+    private final Logger log = LoggerFactory.getLogger(NamedEntityTokenFilter.class);
+    
+    /**
+     * The NLP processing results
+     */
+    private AnalysedText at;
+    /**
+     * The language of the text
+     */
+
+    /**
+     * Iterator over all {@link Chunk}s in the {@link AnalysedText} that do 
+     * have an {@link NerTag}
+     */
+    private Iterator<Chunk> neChunks;
+    
+    protected final CharTermAttribute termAtt;
+    protected final OffsetAttribute offset;
+    protected final TaggingAttribute taggable;
+    
+    private int lookupCount = 0;
+    private int incrementCount = 0;
+
+    /**
+     * List with {@link Chunk}s having {@link NerTag}s. This is used by
+     * the {@link #reduce(TagLL[])} method to check if {@link TagLL tags} 
+     * do cover Named Entities detected in the text.
+     */
+    private List<Chunk> nePhrases;
+
+    private final NavigableMap<int[],Set<String>> nePhrasesTypes;
+    
+    private Chunk neChunk;
+
+    protected final boolean wildcardType;
+
+    protected final Set<String> neTypes;
+
+    /**
+     * A Token Filter for Named Entities of the configured types. Also collects
+     * '<code>span -&gt type</code>' mappings for Named Entities.
+     * @param input the input token stream for the parsed text
+     * @param at the {@link AnalysedText} containing {@link NerTag} values
+     * @param lang the language of the text
+     * @param neTypes the string {@link NerTag#getType()} and {@link NerTag#getTag()}
+     * values of enabled Named Entities. If <code>null</code> or containing the
+     * <code>null</code> element all types will be accepted.
+     * @param nePhrasesTypes The {@link NavigableMap} used to store the spans of
+     * named entities as key and the set o their {@link NerTag#getTag()} and 
+     * {@link NerTag#getType()} as values. Those information are collected while
+     * iterating over the text (by the {@link NamedEntityPredicate}) and are
+     * used later for filtering {@link Match}es based on the type of the Entities.
+     * Typically the {@link TaggingSession#entityMentionTypes} is parsed as this
+     * parameter.
+     */
+    protected NamedEntityTokenFilter(TokenStream input, AnalysedText at, String lang,
+            Set<String> neTypes, NavigableMap<int[],Set<String>> nePhrasesTypes) {
+        super(input);
+        //STANBOL-1177: add attributes in doPrivileged to avoid 
+        //AccessControlException: access denied ("java.lang.RuntimePermission" "getClassLoader")
+        termAtt = AccessController.doPrivileged(new PrivilegedAction<CharTermAttribute>() {
+            @Override public CharTermAttribute run() {
+                return addAttribute(CharTermAttribute.class);
+            }});
+        offset = AccessController.doPrivileged(new PrivilegedAction<OffsetAttribute>() {
+            @Override public OffsetAttribute run() {
+                return addAttribute(OffsetAttribute.class);
+            }});
+        taggable = AccessController.doPrivileged(new PrivilegedAction<TaggingAttribute>() {
+            @Override public TaggingAttribute run() {
+                return addAttribute(TaggingAttribute.class);
+            }});
+        this.at = at;
+        this.wildcardType = neTypes == null || neTypes.contains(null);
+        this.neTypes = neTypes;
+        this.nePhrasesTypes = nePhrasesTypes;
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reset() throws IOException {
+        super.reset();
+        nePhrases = new LinkedList<Chunk>();
+        neChunks = new FilterIterator(at.getChunks(), new NamedEntityPredicate());
+    }
+    
+    @Override
+    public boolean incrementToken() throws IOException {
+        if(input.incrementToken()){
+            incrementCount++;
+            if(log.isTraceEnabled()){
+	            log.trace("> solr:[{},{}] {}",new Object[]{
+	                            offset.startOffset(), offset.endOffset(), termAtt});
+            }
+            while((neChunk == null || neChunk.getEnd() < offset.startOffset()) && neChunks.hasNext()){
+                neChunk = neChunks.next();
+                nePhrases.add(neChunk);
+            }
+            if(neChunk == null){
+                taggable.setTaggable(false);
+                incrementCount++;
+                log.debug("lookup percentage: {}",lookupCount*100/(float)incrementCount);
+                return false;
+            } else if(offset.endOffset() > neChunk.getStart() 
+                    || offset.startOffset() < neChunk.getEnd()){
+                //set tagable to true if the tokens overlapps with the current chunk
+                taggable.setTaggable(true);
+                if(log.isTraceEnabled()){
+                    log.trace("lookup: token [{},{}]: {} | named Entity [{},{}]:{}", 
+                        new Object[]{ offset.startOffset(), offset.endOffset(), 
+                            termAtt, neChunk.getStart(), neChunk.getEnd(),
+                            neChunk.getSpan()});
+                }
+                lookupCount++;
+            } else {
+                taggable.setTaggable(false);
+            }
+            incrementCount++;
+            return true;
+        } else { //no more tokens in the parent token stream
+            return false;
+        }
+    }
+
+    @Override
+    public void reduce(TagLL[] head) {
+        //(1) reduce Tags based on named entity phrases. 
+        for(TagLL tag = head[0]; tag != null; tag = tag.getNextTag()) {
+            int start = tag.getStartOffset();
+            int end = tag.getEndOffset();
+            Chunk nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0);
+            while(nePhrase != null && nePhrase.getEnd() <= start){
+                nePhrases.remove(0);
+                nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0);
+            }
+            if(nePhrase == null || !(start <= nePhrase.getStart() && end >= nePhrase.getEnd())){
+                //does not cover any named entity phrase
+                tag.removeLL(); //remove the tag from the cluster
+                if(log.isTraceEnabled()){
+                    log.trace(" > reduce tag {} - does not cover {}", tag, nePhrase);
+                }
+            } else if(log.isTraceEnabled()) {//the current Tag coveres a named entity phrase
+                log.trace(" > keep tag {} for {}", tag, nePhrase);
+            }
+        }
+    }
+        
+    /**
+     * {@link Predicate} used to select Named Entities based on matching 
+     * {@link NerTag#getTag()} and {@link NerTag#getType()} values against the
+     * {@link NamedEntityTokenFilter#neTypes} configuration. As a side effect
+     * this also collects the {@link NamedEntityTokenFilter#nePhrasesTypes}
+     * information. This avoids a 2nd pass over the {@link AnalysedText} to
+     * collect those information
+     * @author Rupert Westenthaler
+     *
+     */
+    final class NamedEntityPredicate implements Predicate {
+        @Override
+        public boolean evaluate(Object o) {
+            if(o instanceof Chunk){
+                Chunk chunk = (Chunk)o;
+                Value<NerTag> nerValue = chunk.getAnnotation(NER_ANNOTATION);
+                if(nerValue != null){
+                    NerTag nerTag = nerValue.value();
+                    String nerType = nerTag.getType() != null ? 
+                            nerTag.getType().getUnicodeString() : null;
+                    if( wildcardType || neTypes.contains(nerTag.getTag())
+                            || (nerType != null && neTypes.contains(nerType))){
+                        int[] span = new int[]{chunk.getStart(), chunk.getEnd()};
+                        Set<String> types = nePhrasesTypes.get(span);
+                        if(types == null){
+                            types = new HashSet<String>(4);
+                            nePhrasesTypes.put(span, types);
+                        }
+                        types.add(nerType);
+                        types.add(nerTag.getTag());
+                        return true;
+                    }
+                }
+            }
+            return false;
+        }
+    }
+}

Propchange: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java?rev=1674012&view=auto
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java (added)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java Thu Apr 16 07:51:12 2015
@@ -0,0 +1,119 @@
+package org.apache.stanbol.enhancer.engines.lucenefstlinking;
+
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MATCHING_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_SUGGESTIONS;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.ENTITY_TYPES;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.SUGGESTIONS;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.TYPE_MAPPINGS;
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
+import static org.osgi.framework.Constants.SERVICE_RANKING;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.PropertyOption;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * OSGI component used to configure a {@link FstLinkingEngine} with
+ * {@link LinkingModeEnum#PLAIN}. <p>
+ * <b>NOTE:</b> In this configuration no NLP processing other than language
+ * detection is required for linking.
+ * 
+ * @author Rupert Westenthaler
+ *
+ */
+@Component(
+    configurationFactory = true, 
+    policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
+    specVersion = "1.1", 
+    metatype = true, 
+    immediate = true, 
+    inherit = false)
+@Properties(value={
+        @Property(name=PROPERTY_NAME), //the name of the engine
+        @Property(name=FstLinkingEngineComponent.SOLR_CORE),
+        @Property(name=IndexConfiguration.FIELD_ENCODING, options={
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.none",
+                name="None"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.solrYard",
+                name="SolrYard"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusPrefix",
+                name="MinusPrefix"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscorePrefix",
+                name="UnderscorePrefix"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusSuffix",
+                name="MinusSuffix"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscoreSuffix",
+                name="UnderscoreSuffix"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.atPrefix",
+                name="AtPrefix"),
+            @PropertyOption(
+                value='%'+IndexConfiguration.FIELD_ENCODING+".option.atSuffix",
+                name="AtSuffix")
+            },value="SolrYard"),
+        @Property(name=IndexConfiguration.FST_CONFIG, cardinality=Integer.MAX_VALUE),
+        @Property(name=IndexConfiguration.FST_FOLDER, 
+        value=IndexConfiguration.DEFAULT_FST_FOLDER),
+        @Property(name=IndexConfiguration.SOLR_TYPE_FIELD, value="rdf:type"),
+        @Property(name=IndexConfiguration.SOLR_RANKING_FIELD, value="entityhub:entityRank"),
+        @Property(name=FstLinkingEngineComponent.FST_THREAD_POOL_SIZE,
+            intValue=FstLinkingEngineComponent.DEFAULT_FST_THREAD_POOL_SIZE),
+        @Property(name=FstLinkingEngineComponent.ENTITY_CACHE_SIZE, 
+            intValue=FstLinkingEngineComponent.DEFAULT_ENTITY_CACHE_SIZE),
+        @Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
+        @Property(name=INCLUDE_SIMILAR_SCORE, boolValue=DEFAULT_INCLUDE_SIMILAR_SCORE),
+        @Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
+        @Property(name=DEFAULT_MATCHING_LANGUAGE,value=""),
+        @Property(name=ENTITY_TYPES,cardinality=Integer.MAX_VALUE),
+        @Property(name=TYPE_MAPPINGS,cardinality=Integer.MAX_VALUE, value={
+            "dbp-ont:Organisation; dbp-ont:Newspaper; schema:Organization > dbp-ont:Organisation",
+            "dbp-ont:Person; foaf:Person; schema:Person > dbp-ont:Person",
+            "dbp-ont:Place; schema:Place; geonames:Feature > dbp-ont:Place",
+            "dbp-ont:Work; schema:CreativeWork > dbp-ont:Work",
+            "dbp-ont:Event; schema:Event > dbp-ont:Event",
+            "schema:Product > schema:Product",
+            "skos:Concept > skos:Concept"}),
+        @Property(name=SERVICE_RANKING,intValue=0)
+    })
+public class PlainFstLinkingComponnet extends FstLinkingEngineComponent {
+
+    /**
+     * used to resolve '{prefix}:{local-name}' used within the engines configuration
+     */
+    @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
+    private NamespacePrefixService prefixService;    
+
+    
+    @Activate
+    @Override
+    protected void activate(ComponentContext ctx) throws ConfigurationException {
+        log.info("activate {}",getClass().getSimpleName());
+        this.bundleContext = ctx.getBundleContext();
+        super.applyConfig(LinkingModeEnum.PLAIN, ctx.getProperties(), prefixService);
+    }
+    
+    @Deactivate
+    @Override
+    protected void deactivate(ComponentContext ctx) {
+        super.deactivate(ctx);
+    }
+}

Propchange: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java Thu Apr 16 07:51:12 2015
@@ -24,11 +24,12 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.NavigableMap;
 import java.util.Set;
+import java.util.TreeMap;
 
 import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.Literal;
-import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.commons.lang.StringUtils;
@@ -40,24 +41,17 @@ import org.apache.lucene.document.String
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.queries.function.valuesource.IfFunction;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.Match.FieldLoader;
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.Match.FieldType;
 import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.EntityCache;
-import org.apache.stanbol.enhancer.engines.lucenefstlinking.impl.ValueSourceAccessor;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.EngineException;
-import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.opensextant.solrtexttagger.TaggerFstCorpus;
-import org.opensextant.solrtexttagger.UnsupportedTokenException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.eventbus.AllowConcurrentEvents;
-
 /**
  * Profile created based on the {@link IndexConfiguration} for processing a
  * parsed ContentItem. <p>
@@ -101,6 +95,18 @@ public class TaggingSession implements C
     protected final String typeField;
     protected final String redirectField;
     protected final String rankingField;
+    
+    /**
+     * Used in the {@link LinkingModeEnum#NER} to store the {@link NerTag#getTag()}
+     * and {@link NerTag#getType()} values for the span of the Named Entity.<p>
+     * This information is collected by the {@link NamedEntityTokenFilter} while
+     * iterating over the parsed text and is used in the processing of
+     * {@link Tag}s to filter Entities based on their types. <p>
+     * Not used in any linking mode other than <code>NER</code>
+     */
+    protected final NavigableMap<int[],Set<String>> entityMentionTypes = 
+            new TreeMap<int[],Set<String>>(Tag.SPAN_COMPARATOR);
+    
     private final RefCounted<SolrIndexSearcher> searcherRef;
     /**
      * Document Cache and session statistics for the cache

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties Thu Apr 16 07:51:12 2015
@@ -25,9 +25,28 @@ one with the higher ranking will be used
 #Properties specific to the FST linking engine 
 #===============================================================================
 org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.name=Apache \
-Stanbol Enhancer Engine: FST Linking
+Stanbol Enhancer Engine: FST Linking: Linkable Token
 org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.description=Lucene \
-FST based Entity Linking Engine implementation.
+FST based Entity Linking Engine that looks up Linkable Tokens in the controlled vocabulary. \
+Typically Proper Nouns (or all Nouns) are considered as linkable. Also Noun Phrases are \
+used to ensure that single word matches are not matched for phrases in the text (e.g. that \
+"university" is not matched with "University of Munich" mentioned in the text).
+
+org.apache.stanbol.enhancer.engines.lucenefstlinking.NamedEntityFstLinkingComponnet.name=Apache \
+Stanbol Enhancer Engine: FST Linking: Named Entities
+org.apache.stanbol.enhancer.engines.lucenefstlinking.NamedEntityFstLinkingComponnet= Lucene \
+FST based Entity Linking Enigne that looks up Named Entities recognized in the text in the \
+configured controlled vocabulary. This mode supports to filter possible matches in the \
+vocabulary based on the type detected for the Named Entity.
+
+org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet.name=Apache \
+Stanbol Enhancer Engine: FST Linking: Plain
+org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet.description=\
+Lucene FST based Entity Linking Engine that operates on the plain text. It does not use \
+(and require) any NLP processing results (other than language detection). The Query time \
+Lucene Analyzer is used to process the parsed text and every token is linked with the \
+controlled vocabulary.
+
 
 enhancer.engines.linking.lucenefst.solrcore.name=Solr Core
 enhancer.engines.linking.lucenefst.solrcore.description=The reference to the SolrCore. \
@@ -153,15 +172,23 @@ enhancer.engines.linking.entityTypes.nam
 enhancer.engines.linking.entityTypes.description=Allows to define a white/black list \
 based on the types of Entities. Use '!{uri}' for black listing and '{uri}' for white \
 listing. Include '*' to force white listing (e.g. to allow Entities without any type). \
-Rules are processed based on their oder. 
+Rules are processed based on their oder. NOTE: Not used in the NER linking mode
 
 enhancer.engines.linking.lucenefst.mode.name=Linking Mode
 enhancer.engines.linking.lucenefst.mode.description=The linking mode allows to switch the \
 operation mode of the FST linking engine: PLAIN will link every single word with the \
 vocabulary. No NLP processing is required in this mode; LINKABLE_TOKEN will use NLP \
 processing results to determine what tokens should be linked (typically all Nouns or \
-only ProperNouns - configurable via the TextProcessing configuration); 
-#finally the NER mode will only link Named Entities detected by a NER component.
+only ProperNouns - configurable via the TextProcessing configuration); \
+finally the NER mode will only link Named Entities detected by a NER component.
 enhancer.engines.linking.lucenefst.mode.option.plain=Plain
 enhancer.engines.linking.lucenefst.mode.option.linkableToken=Linkable Tokens
-#enhancer.engines.linking.lucenefst.mode.option.ner=NER (not yet implemented)
+enhancer.engines.linking.lucenefst.mode.option.ner=NER
+
+enhancer.engines.linking.lucenefst.neTypeMapping.name=Named Entity Type Mappings
+enhancer.engines.linking.lucenefst.neTypeMapping.description=Allows to map Named \
+Entity Tags and Types to Entity types. Syntax: {ne-type} > {entity-type-1}; {entity-type-2}. \
+(e.g. a mapping for the tag "Person" to the type schema:Person - "Person > http://schema.org/Person", \
+a second mapping for the type "dbpedia:Person" to person types of different ontologies \
+"dbpedia:Person > dbpedia:Person; schema:Person; foaf:Person"). \
+NOTE: Only used in the NER linking mode.

Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java Thu Apr 16 07:51:12 2015
@@ -301,7 +301,7 @@ public class FstLinkingEngineTest {
         elc.setMinFoundTokens(2);//this is assumed by this test
         elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
         FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking", 
-            LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc);
+            LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc, null);
         processConentItem(engine);
         validateEnhancements(
             Arrays.asList(
@@ -322,7 +322,7 @@ public class FstLinkingEngineTest {
         elc.setMinFoundTokens(2);//this is assumed by this test
         elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
         FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking", 
-            LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc);
+            LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc, null);
         processConentItem(engine);
         validateEnhancements(
             Arrays.asList(