You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2015/04/16 09:51:12 UTC
svn commit: r1674012 - in
/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src:
main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/
main/resources/OSGI-INF/metatype/
test/java/org/apache/stanbol/enhancer/engines/lucenefst...
Author: rwesten
Date: Thu Apr 16 07:51:12 2015
New Revision: 1674012
URL: http://svn.apache.org/r1674012
Log:
fix for STANBOL-1416 and implementation for STANBOL-1418; Also greatly improved DEBUG level loggings for the FstLinkingEngineComponent. For detailed Infos see the issues
Added:
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java (with props)
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java (with props)
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java (with props)
Modified:
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngine.java Thu Apr 16 07:51:12 2015
@@ -28,6 +28,7 @@ import static org.apache.stanbol.enhance
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@@ -62,8 +63,8 @@ import org.apache.stanbol.enhancer.engin
import org.apache.stanbol.enhancer.engines.lucenefstlinking.TaggingSession.Corpus;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
-import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -101,12 +102,23 @@ public class FstLinkingEngine implements
protected final TextProcessingConfig tpConfig;
protected final EntityLinkerConfig elConfig;
+
+ /**
+ * Used in the {@link LinkingModeEnum#NER} to filter entities. For that configured
+ * mappings for the {@link NerTag#getType()} and {@link NerTag#getTag()} values
+ * (the key) are mapped with the actual {@link Match#getTypes()} (the value set).
+ * The <code>null</code> value is interpreted as wildCard (any type matches). An
+ * empty mapping is interpreted as an blacklist (do not lookup Named Entities
+ * with that {@link NerTag#getType() type}/{@link NerTag#getTag() tag}
+ */
+ protected final Map<String,Set<String>> neTypeMappings;
private IndexConfiguration indexConfig;
public FstLinkingEngine(String name, LinkingModeEnum linkingMode,
IndexConfiguration indexConfig,
- TextProcessingConfig tpConfig, EntityLinkerConfig elConfig) {
+ TextProcessingConfig tpConfig, EntityLinkerConfig elConfig,
+ Map<String,Set<String>> neTypeMappings) {
if (StringUtils.isBlank(name)) {
throw new IllegalArgumentException("The parsed name MUST NOT be NULL nor blank!");
}
@@ -124,6 +136,11 @@ public class FstLinkingEngine implements
throw new IllegalArgumentException("The parsed Entity Linking configuration MUST NOT be NULL");
}
this.elConfig = elConfig;
+ if(linkingMode == LinkingModeEnum.NER && neTypeMappings == null){
+ throw new IllegalArgumentException("The NamedEntity type mappings MUST NOT be NULL "
+ + "if the LinkingMode is NER!");
+ }
+ this.neTypeMappings = neTypeMappings;
}
@Override
@@ -155,9 +172,17 @@ public class FstLinkingEngine implements
}
// we need a detected language, the AnalyzedText contentPart with
// Tokens.
- AnalysedText at = getAnalysedText(this, ci, false);
- if(at == null && linkingMode == LinkingModeEnum.PLAIN){
- return NlpEngineHelper.getPlainText(this, ci, false) != null ? ENHANCE_ASYNC : CANNOT_ENHANCE;
+ AnalysedText at = AnalysedTextUtils.getAnalysedText(ci);
+ if(at == null){
+ if( linkingMode == LinkingModeEnum.PLAIN){
+ return NlpEngineHelper.getPlainText(this, ci, false) != null ? ENHANCE_ASYNC : CANNOT_ENHANCE;
+ } else {
+ log.warn("Unable to process {} with engine name={} and mode={} "
+ + ": Missing AnalyzedText content part. Please ensure that "
+ + "NLP processing results are available before FST linking!",
+ new Object[]{ci,name,linkingMode});
+ return CANNOT_ENHANCE;
+ }
} else {
if(linkingMode == LinkingModeEnum.PLAIN){
return ENHANCE_ASYNC;
@@ -167,7 +192,7 @@ public class FstLinkingEngine implements
log.warn("Unable to process {} with engine name={} and mode={} "
+ "as the AnalyzedText does not contain any Tokens!",
new Object[]{ci,name,linkingMode});
- return at.getTokens().hasNext() ? ENHANCE_ASYNC : CANNOT_ENHANCE;
+ return CANNOT_ENHANCE;
}
}
}
@@ -243,7 +268,7 @@ public class FstLinkingEngine implements
log.debug(" - sum fst: {} ms", taggingEnd - taggingStart);
}
}
- int matches = match(content,tags.values());
+ int matches = match(content, tags.values(), session.entityMentionTypes);
log.debug(" - loaded {} ({} loaded, {} cached, {} appended) Matches in {} ms",
new Object[]{matches, session.getSessionDocLoaded(),
session.getSessionDocCached(), session.getSessionDocAppended(),
@@ -273,7 +298,7 @@ public class FstLinkingEngine implements
tags.clear(); //help the GC
}
- private int match(String text, Collection<Tag> tags) {
+ private int match(String text, Collection<Tag> tags, Map<int[],Set<String>> emTypes) {
log.trace(" ... process matches for {} extracted Tags:",tags.size());
int matchCount = 0;
Iterator<Tag> tagIt = tags.iterator();
@@ -294,7 +319,20 @@ public class FstLinkingEngine implements
log.trace(" {}. {}", i++, match.getUri());
}
matchCount++;
- if(!filterEntityByType(match.getTypes().iterator())){
+ final boolean filterType;
+ if(linkingMode == LinkingModeEnum.NER){
+ Set<String> types = emTypes.get(new int[]{tag.getStart(), tag.getEnd()});
+ if(types == null){
+ log.warn(" - missing NE types for Named Entity [{},{}] {}!",
+ new Object[]{tag.getStart(), tag.getEnd(),tag.getAnchor()});
+ filterType = true;
+ } else {
+ filterType = filterByNamedEntityType(match.getTypes().iterator(), types);
+ }
+ } else {
+ filterType = filterEntityByType(match.getTypes().iterator());
+ }
+ if(!filterType){
int distance = Integer.MAX_VALUE;
Literal matchLabel = null;
for(Iterator<Literal> it = match.getLabels().iterator(); it.hasNext() && distance > 0;){
@@ -370,6 +408,44 @@ public class FstLinkingEngine implements
return matchCount;
}
/**
+ * Filter Entities based on matching the entity types with the named entity types.
+ * The {@link #neTypeMappings} are used to convert named entity types to
+ * entity types.
+ * @param eTypes the types of the entity
+ * @param neTypes the types of the named entity
+ * @return
+ */
+ private boolean filterByNamedEntityType(Iterator<UriRef> eTypes, Set<String> neTypes) {
+ //first collect the allowed entity types
+ Set<String> entityTypes = new HashSet<String>();
+ for(String neType : neTypes){
+ if(neType != null){
+ Set<String> mappings = neTypeMappings.get(neType);
+ if(mappings != null){
+ if(mappings.contains(null)){
+ //found an wildcard
+ return false; //do not filter
+ } else {
+ entityTypes.addAll(mappings);
+ }
+ } //else no mapping for neType (tag or uri) present
+ }
+ }
+ if(entityTypes.isEmpty()){
+ return true; //no match possible .. filter
+ }
+ //second check the actual entity types against the allowed
+ while(eTypes.hasNext()){
+ UriRef typeUri = eTypes.next();
+ if(typeUri != null && entityTypes.contains(typeUri.getUnicodeString())){
+ return false; //we found an match .. do not filter
+ }
+ }
+ //no match found ... filter
+ return true;
+ }
+
+ /**
* Applies the configured entity type based filters
* @param entityTypes
* @return
@@ -432,11 +508,23 @@ public class FstLinkingEngine implements
tokenStream = baseTokenStream;
reducer = TagClusterReducer.LONGEST_DOMINANT_RIGHT;
break;
-// case NER:
+ case NER:
+ //this uses the NamedEntityTokenFilter as tokenStream and a
+ //combination with the longest dominant right as reducer
+ NamedEntityTokenFilter neTokenFilter = new NamedEntityTokenFilter(
+ baseTokenStream, at, session.getLanguage(), neTypeMappings.keySet(),
+ session.entityMentionTypes);
+ tokenStream = neTokenFilter;
+ reducer = new ChainedTagClusterReducer(neTokenFilter,
+ TagClusterReducer.LONGEST_DOMINANT_RIGHT);
+ break;
case LINKABLE_TOKEN:
+ //this uses the LinkableTokenFilter as tokenStream
LinkableTokenFilter linkableTokenFilter = new LinkableTokenFilter(baseTokenStream,
at, session.getLanguage(), tpConfig.getConfiguration(session.getLanguage()),
elConfig.getMinChunkMatchScore(), elConfig.getMinFoundTokens());
+ //NOTE that the LinkableTokenFilter implements longest dominant right
+ // based on the matchable span of tags (instead of the whole span).
reducer = new ChainedTagClusterReducer(
linkableTokenFilter,TagClusterReducer.ALL);
tokenStream = linkableTokenFilter;
@@ -446,11 +534,9 @@ public class FstLinkingEngine implements
+ linkingMode + "! Please adapt implementation to changed Enumeration!");
}
log.debug(" - tokenStream: {}", tokenStream);
- log.debug(" - reducer: {}", reducer);
- //we use two TagClusterReducer implementations.
- // (1) the linkableTokenFilter filters all tags that do not overlap any
- // linkable Token
- // (2) the LONGEST_DOMINANT_RIGHT reducer (TODO: make configurable)
+ log.debug(" - reducer: {} (class: {})", reducer, reducer.getClass().getName());
+
+ //Now process the document
final long[] time = new long[]{0};
new Tagger(corpus.getFst(), tokenStream, reducer,session.isSkipAltTokens()) {
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java Thu Apr 16 07:51:12 2015
@@ -36,9 +36,13 @@ import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Dictionary;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
+import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -63,6 +67,7 @@ import org.apache.felix.scr.annotations.
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.SolrCore;
+import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.commons.solr.IndexReference;
import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
@@ -70,6 +75,7 @@ import org.apache.stanbol.enhancer.engin
import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.EntityCacheManager;
import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.FastLRUCacheManager;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
@@ -132,35 +138,12 @@ import com.google.common.util.concurrent
value=IndexConfiguration.DEFAULT_FST_FOLDER),
@Property(name=IndexConfiguration.SOLR_TYPE_FIELD, value="rdf:type"),
@Property(name=IndexConfiguration.SOLR_RANKING_FIELD, value="entityhub:entityRank"),
-// @Property(name=REDIRECT_FIELD,value="rdfs:seeAlso"),
-// @Property(name=REDIRECT_MODE,options={
-// @PropertyOption(
-// value='%'+REDIRECT_MODE+".option.ignore",
-// name="IGNORE"),
-// @PropertyOption(
-// value='%'+REDIRECT_MODE+".option.addValues",
-// name="ADD_VALUES"),
-// @PropertyOption(
-// value='%'+REDIRECT_MODE+".option.follow",
-// name="FOLLOW")
-// },value="IGNORE"),
@Property(name=FstLinkingEngineComponent.FST_THREAD_POOL_SIZE,
intValue=FstLinkingEngineComponent.DEFAULT_FST_THREAD_POOL_SIZE),
@Property(name=FstLinkingEngineComponent.ENTITY_CACHE_SIZE,
intValue=FstLinkingEngineComponent.DEFAULT_ENTITY_CACHE_SIZE),
@Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
@Property(name=INCLUDE_SIMILAR_SCORE, boolValue=DEFAULT_INCLUDE_SIMILAR_SCORE),
- @Property(name=FstLinkingEngineComponent.LINKING_MODE, options={
- @PropertyOption(
- value='%'+FstLinkingEngineComponent.LINKING_MODE+".option.plain",
- name="PLAIN"),
- @PropertyOption(
- value='%'+FstLinkingEngineComponent.LINKING_MODE+".option.linkableToken",
- name="LINKABLE_TOKEN") //,
- //@PropertyOption(
- // value='%'+FstLinkingEngineComponent.LINKING_MODE+".option.ner",
- // name="NER")
- },value="LINKABLE_TOKEN"),
@Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
@Property(name=PROCESS_ONLY_PROPER_NOUNS_STATE, boolValue=DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE),
@Property(name=PROCESSED_LANGUAGES, cardinality=Integer.MAX_VALUE,
@@ -178,9 +161,6 @@ import com.google.common.util.concurrent
"dbp-ont:Event; schema:Event > dbp-ont:Event",
"schema:Product > schema:Product",
"skos:Concept > skos:Concept"}),
-// @Property(name=DEREFERENCE_ENTITIES, boolValue=DEFAULT_DEREFERENCE_ENTITIES_STATE),
-// @Property(name=DEREFERENCE_ENTITIES_FIELDS,cardinality=Integer.MAX_VALUE,
-// value={"rdfs:comment","geo:lat","geo:long","foaf:depiction","dbp-ont:thumbnail"}),
@Property(name=SERVICE_RANKING,intValue=0)
})
public class FstLinkingEngineComponent {
@@ -206,6 +186,13 @@ public class FstLinkingEngineComponent {
public static final String LINKING_MODE = "enhancer.engines.linking.lucenefst.mode";
/**
+ * Allows to configure mappings of NamedEntity Types to types of Entities in the
+ * vocabulary. Configured keys are matched against the {@link NerTag#getTag()} AND
+ * {@link NerTag#getType()} values of NamedEntities. Configured Values are mapped
+ * against the values of the configured {@link IndexConfiguration#SOLR_TYPE_FIELD}.
+ */
+ public static final String NAMED_ENTITY_TYPE_MAPPINGS = "enhancer.engines.linking.lucenefst.neTypeMapping";
+ /**
* The size of the thread pool used to create FST models (default=1). Creating
* such models does need a lot of memory. Expect values up to 10times of the
* build model. So while this task can easily performed concurrently users need
@@ -242,7 +229,7 @@ public class FstLinkingEngineComponent {
*/
private static final Integer FST_DEFAULT_MIN_FOUND_TOKENS = 2;
- private final Logger log = LoggerFactory.getLogger(FstLinkingEngineComponent.class);
+ protected final Logger log = LoggerFactory.getLogger(FstLinkingEngineComponent.class);
/**
* the name for the EnhancementEngine registered by this component
*/
@@ -257,7 +244,7 @@ public class FstLinkingEngineComponent {
* used to resolve '{prefix}:{local-name}' used within the engines configuration
*/
@Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
- protected NamespacePrefixService prefixService;
+ private NamespacePrefixService prefixService;
/**
* Holds the FST configuration parsed to the engine
@@ -322,7 +309,7 @@ public class FstLinkingEngineComponent {
* The bundle context for this component. Also used to track dependencies
* and register the {@link #engineRegistration}
*/
- private BundleContext bundleContext;
+ protected BundleContext bundleContext;
/**
* Thread pool used for the runtime creation of FST modles.
@@ -355,6 +342,8 @@ public class FstLinkingEngineComponent {
* The size of the EntityCache ( <code>0</code> ... means deactivated)
*/
private int entityCacheSize;
+
+ private Map<String,Set<String>> nerTypeMappings;
/**
* Default constructor as used by OSGI. This expects that
@@ -366,9 +355,63 @@ public class FstLinkingEngineComponent {
@Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ctx) throws ConfigurationException {
- log.info("activate {}",getClass().getSimpleName());
+ log.info("activate {}", getClass().getSimpleName());
+ log.debug(" - instance: {}", this);
+ log.debug(" - config: {}", ctx.getProperties());
this.bundleContext = ctx.getBundleContext();
- Dictionary<String,Object> properties = ctx.getProperties();
+ //(0) parse the linking mode
+ applyConfig(parseLinkingMode(ctx), ctx.getProperties(), prefixService);
+ }
+
+ /**
+ * Parses the LinkingMode from the {@link #LINKING_MODE} property. This
+ * allows to use this component to configure FST linking engines for any
+ * supported LinkingMode. If the {@link #LINKING_MODE} is not present the
+ * default {@link LinkingModeEnum#LINKABLE_TOKEN} is returned. <p>
+ * <b>NOTE:</b>Typically
+ * users will want to use the <ul>
+ * <li>{@link PlainFstLinkingComponnet} to configure FST engines for the
+ * {@link LinkingModeEnum#PLAIN}
+ * <li> {@link NamedEntityFstLinkingComponnet} to configure FST engines for
+ * the {@link LinkingModeEnum#NER}
+ * </ul>
+ * but is is also fine to explicitly specify a {@link #LINKING_MODE} linking
+ * mode when using this component to configure the FST linking engine.
+ * @param ctx the parsed component context
+ * @return the parsed {@link LinkingModeEnum}
+ * @throws ConfigurationException
+ */
+ private LinkingModeEnum parseLinkingMode(ComponentContext ctx) throws ConfigurationException {
+ Object value = ctx.getProperties().get(LINKING_MODE);
+ LinkingModeEnum linkingMode;
+ if(value == null || StringUtils.isBlank(value.toString())){
+ linkingMode = LinkingModeEnum.LINKABLE_TOKEN;
+ } else {
+ try {
+ linkingMode = LinkingModeEnum.valueOf(value.toString());
+ } catch(IllegalArgumentException e){
+ throw new ConfigurationException(LINKING_MODE, "The parsed value '"
+ +value+"' (type: "+value.getClass().getName()+") is not a member "
+ + "of the enum (members: "+ Arrays.toString(LinkingModeEnum.values())
+ + ")!",e);
+ }
+ }
+ return linkingMode;
+ }
+ /**
+ * Called by {@link #activate(ComponentContext)},
+ * {@link PlainFstLinkingComponnet#activate(ComponentContext)} and
+ * {@link NamedEntityFstLinkingComponnet#activate(ComponentContext)} to
+ * apply the parsed {@link ComponentContext#getProperties()}. The
+ * {@link LinkingModeEnum linking mode} is parsed separately as OSGI does not
+ * allow to modify the parsed config and sup-classes do need to override
+ * the linking mode.
+ * @param linkingMode the linking mode
+ * @param properties
+ * @throws ConfigurationException
+ */
+ protected void applyConfig(LinkingModeEnum linkingMode, Dictionary<String,Object> properties, NamespacePrefixService prefixService)
+ throws ConfigurationException {
//(0) The name for the Enhancement Engine and the basic metadata
Object value = properties.get(PROPERTY_NAME);
if(value == null || value.toString().isEmpty()){
@@ -381,21 +424,10 @@ public class FstLinkingEngineComponent {
engineMetadata.put(PROPERTY_NAME, this.engineName);
value = properties.get(Constants.SERVICE_RANKING);
engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
- //(0) parse the linking mode
- value = properties.get(LINKING_MODE);
- if(value == null || StringUtils.isBlank(value.toString())){
- this.linkingMode = LinkingModeEnum.LINKABLE_TOKEN;
- } else {
- try {
- this.linkingMode = LinkingModeEnum.valueOf(value.toString());
- } catch(IllegalArgumentException e){
- throw new ConfigurationException(LINKING_MODE, "The parsed value '"
- +value+"' (type: "+value.getClass().getName()+") is not a member "
- + "of the enum (members: "+ Arrays.toString(LinkingModeEnum.values())
- + ")!",e);
- }
- }
- log.info(" - linking mode: {}",linkingMode);
+
+ //(0) set the linking mode
+ this.linkingMode = linkingMode;
+ log.info(" - linking mode: {}", linkingMode);
//(1) parse the TextProcessing configuration
//TODO: decide if we should use the TextProcessingConfig for this engine
@@ -561,8 +593,70 @@ public class FstLinkingEngineComponent {
} else {
solrRankingField = value.toString().trim();
}
+ //(10) parse the NamedEntity type mappings (if linkingMode = NER)
+ if(linkingMode == LinkingModeEnum.NER){
+ nerTypeMappings = new HashMap<String,Set<String>>();
+ value = properties.get(NAMED_ENTITY_TYPE_MAPPINGS);
+ if(value instanceof String[]){ //support array
+ value = Arrays.asList((String[])value);
+ } else if(value instanceof String) { //single value
+ value = Collections.singleton(value);
+ }
+ if(value instanceof Collection<?>){ //and collection
+ log.info(" - process Named Entity Type Mappings (used by LinkingMode: {})",linkingMode);
+ configs : for(Object o : (Iterable<?>)value){
+ if(o != null){
+ StringBuilder usage = new StringBuilder("useage: ");
+ usage.append("'{namedEntity-tag-or-uri} > {entityType-1}[,{entityType-n}]'");
+ String[] config = o.toString().split(">");
+ String namedEntityType = config[0].trim();
+ if(namedEntityType.isEmpty()){
+ log.warn("Invalid Type Mapping Config '{}': Missing namedEntityType ({}) -> ignore this config",
+ o,usage);
+ continue configs;
+ }
+ if(NamespaceMappingUtils.getPrefix(namedEntityType) != null){
+ namedEntityType = NamespaceMappingUtils.getConfiguredUri(
+ prefixService, NAMED_ENTITY_TYPE_MAPPINGS,namedEntityType);
+ }
+ if(config.length < 2 || config[1].isEmpty()){
+ log.warn("Invalid Type Mapping Config '{}': Missing dc:type URI '{}' ({}) -> ignore this config",
+ o,usage);
+ continue configs;
+ }
+ String entityTypes = config[1].trim();
+ if(config.length > 2){
+ log.warn("Configuration after 2nd '>' gets ignored. Will use mapping '{} > {}' from config {}",
+ new Object[]{namedEntityType,entityTypes,o});
+ }
+ Set<String> types = nerTypeMappings.get(namedEntityType);
+ if(types == null){ //add new element to the mapping
+ types = new HashSet<String>();
+ nerTypeMappings.put(namedEntityType, types);
+ }
+ for(String entityType : entityTypes.split(";")){
+ entityType = entityType.trim();
+ if(!entityType.isEmpty()){
+ String typeUri;
+ if("*".equals(entityType)){
+ typeUri = null; //null is used as wildcard
+ } else {
+ typeUri = NamespaceMappingUtils.getConfiguredUri(
+ prefixService, NAMED_ENTITY_TYPE_MAPPINGS, entityType);
+ }
+ log.info(" - add {} > {}", namedEntityType, typeUri);
+ types.add(typeUri);
+ } //else ignore empty mapping
+ }
+ }
+ }
+ } else { //no mappings defined ... set wildcard mapping
+ log.info(" - No Named Entity type mappings configured. Will use wildcard mappings");
+ nerTypeMappings = Collections.singletonMap(null, Collections.<String>singleton(null));
+ }
+ }
- //(10) start tracking the SolrCore
+ //(11) start tracking the SolrCore
try {
solrServerTracker = new RegisteredSolrServerTracker(
bundleContext, indexReference, null){
@@ -599,7 +693,18 @@ public class FstLinkingEngineComponent {
throw new ConfigurationException(SOLR_CORE, "parsed SolrCore name '"
+ value.toString()+"' is invalid (expected: '[{server-name}:]{indexname}'");
}
- solrServerTracker.open();
+ try {
+ solrServerTracker.open();
+ } catch(RuntimeException e){
+ //FIX for STANBOL-1416 (see https://issues.apache.org/jira/browse/STANBOL-1416)
+ //If an available SolrCore can not be correctly initialized we will
+ //get the exception here. In this case we want this component to be
+ //activated and waiting for further service events. Because of that
+ //we catch here the exception.
+ log.debug("Error while processing existing SolrCore Service during "
+ + "opening SolrServiceTracker ... waiting for further service"
+ + "Events", e);
+ }
}
/**
@@ -712,18 +817,28 @@ public class FstLinkingEngineComponent {
} else {
log.info(" ... no corpus for default language {} available", defaultCoprous);
}
- //set the index configuration to the field;
+
+ //check if the old configuration is still present
+ if(this.engineRegistration != null){
+ unregisterEngine();
+ }
+
+ //create the new configuration
+
+ //set the newly configured instances to the fields
this.indexConfig = indexConfig;
+ this.solrServerReference = reference;
+ this.solrCore = core;
+ //create the new FST linking engine instance
FstLinkingEngine engine = new FstLinkingEngine(engineName,
linkingMode, indexConfig,
- textProcessingConfig, entityLinkerConfig);
+ textProcessingConfig, entityLinkerConfig, nerTypeMappings);
+ //register it as a service
String[] services = new String [] {
EnhancementEngine.class.getName(),
ServiceProperties.class.getName()};
log.info(" ... register {}: {}", engine.getClass().getSimpleName(),engineName);
this.engineRegistration = bundleContext.registerService(services,engine, engineMetadata);
- this.solrServerReference = reference;
- this.solrCore = core;
}
@@ -765,12 +880,21 @@ public class FstLinkingEngineComponent {
* rests the fields. If no engine is registered this does nothing!
*/
private void unregisterEngine() {
+ log.debug("> in unregisterEngine() ...");
//use local copies for method calls to avoid concurrency issues
ServiceRegistration engineRegistration = this.engineRegistration;
if(engineRegistration != null){
log.info(" ... unregister Lucene FSTLinkingEngine {}",engineName);
- engineRegistration.unregister();
+ try {
+ engineRegistration.unregister();
+ } catch(IllegalStateException e) {
+ //this is unexpected but can be ignored
+ log.info("Unexpected State: Service for FSTLinkingEngine "
+ + engineName+" was already deactivated.", e);
+ }
this.engineRegistration = null; //reset the field
+ } else {
+ log.debug(" ... no engine registration present");
}
solrServerReference = null;
SolrCore solrServer = this.solrCore;
@@ -778,6 +902,8 @@ public class FstLinkingEngineComponent {
log.debug(" ... unregister SolrCore {}", solrServer.getName());
solrServer.close(); //decrease the reference count!!
this.solrCore = null; //rest the field
+ } else {
+ log.debug(" ... no SolrCore present");
}
//deactivate the index configuration if present
if(indexConfig != null){
@@ -790,6 +916,8 @@ public class FstLinkingEngineComponent {
cacheManager.close();
}
indexConfig = null;
+ } else {
+ log.debug(" ... no index config present");
}
}
@@ -834,7 +962,11 @@ public class FstLinkingEngineComponent {
*/
@Deactivate
protected void deactivate(ComponentContext ctx) {
- log.info(" ... deactivate {}: {}",getClass().getSimpleName(), engineName);
+ log.info(" ... deactivate {}: {} (CompInst: {})",new Object[] {
+ getClass().getSimpleName(),
+ engineName, ctx.getComponentInstance()});
+ log.debug(" - instance: {}", this);
+ log.debug(" - config: {}", ctx.getProperties());
if(solrServerTracker != null){
//closing the tracker will also cause registered engines to be
//unregistered as service (see #updateEngineRegistration())
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkableTokenFilter.java Thu Apr 16 07:51:12 2015
@@ -57,12 +57,12 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Class the ensures that only {@link TokenData#isLinkable linkable} Tokens
+ * Class that ensures that only {@link TokenData#isLinkable linkable} Tokens
* are processed.<p>
* This is ensured on two places:<ol>
* <li> Classifies Tokens in the Solr {@link TokenStream} with the {@link TaggingAttribute}
* based on NLP processing results present in the {@link AnalysedText}. This
- * implementation Classifies Token similar to the {@link EntityLinkingEngine}.
+ * implementation classifies Token similar to the {@link EntityLinkingEngine}.
* It uses the {@link TextProcessingConfig} for its configuration.<p>
* <li> Implements {@link TagClusterReducer} to ensure that all {@link TagLL tags}
* that do not overlap with any {@link TokenData#isLinkable linkable} are
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/LinkingModeEnum.java Thu Apr 16 07:51:12 2015
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.stanbol.enhancer.engines.lucenefstlinking;
import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
@@ -15,10 +31,10 @@ public enum LinkingModeEnum {
* or even only {@link Pos#ProperNoun} - depending on the
* {@link TextProcessingConfig}
*/
- LINKABLE_TOKEN //,
-// /**
-// * Only {@link NerTag}s are linked with the vocabualry
-// */
-// NER
+ LINKABLE_TOKEN,
+ /**
+ * Only {@link NerTag}s are linked with the vocabualry
+ */
+ NER
}
Added: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java?rev=1674012&view=auto
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java (added)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java Thu Apr 16 07:51:12 2015
@@ -0,0 +1,118 @@
+package org.apache.stanbol.enhancer.engines.lucenefstlinking;
+
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MATCHING_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_SUGGESTIONS;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.SUGGESTIONS;
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
+import static org.osgi.framework.Constants.SERVICE_RANKING;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.PropertyOption;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * OSGI component used to configure a {@link FstLinkingEngine} with
+ * {@link LinkingModeEnum#NER}. <p>
+ * <b>NOTE:</b> Using this Engine requires {@link NerTag}s to be present in the
+ * {@link AnalysedText} content part. In addition {@link NerTag#getTag()} and
+ * {@link NerTag#getType()} values need to be mapped to expected Entity types
+ * in the linked vocabulary. This is configured by using the
+ * {@link FstLinkingEngineComponent#NAMED_ENTITY_TYPE_MAPPINGS} property.
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+@Component(
+ configurationFactory = true,
+ policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
+ specVersion = "1.1",
+ metatype = true,
+ immediate = true,
+ inherit = false)
+@Properties(value={
+ @Property(name=PROPERTY_NAME), //the name of the engine
+ @Property(name=FstLinkingEngineComponent.SOLR_CORE),
+ @Property(name=IndexConfiguration.FIELD_ENCODING, options={
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.none",
+ name="None"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.solrYard",
+ name="SolrYard"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusPrefix",
+ name="MinusPrefix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscorePrefix",
+ name="UnderscorePrefix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusSuffix",
+ name="MinusSuffix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscoreSuffix",
+ name="UnderscoreSuffix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.atPrefix",
+ name="AtPrefix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.atSuffix",
+ name="AtSuffix")
+ },value="SolrYard"),
+ @Property(name=IndexConfiguration.FST_CONFIG, cardinality=Integer.MAX_VALUE),
+ @Property(name=IndexConfiguration.FST_FOLDER,
+ value=IndexConfiguration.DEFAULT_FST_FOLDER),
+ @Property(name=IndexConfiguration.SOLR_TYPE_FIELD, value="rdf:type"),
+ @Property(name=IndexConfiguration.SOLR_RANKING_FIELD, value="entityhub:entityRank"),
+ @Property(name=FstLinkingEngineComponent.FST_THREAD_POOL_SIZE,
+ intValue=FstLinkingEngineComponent.DEFAULT_FST_THREAD_POOL_SIZE),
+ @Property(name=FstLinkingEngineComponent.ENTITY_CACHE_SIZE,
+ intValue=FstLinkingEngineComponent.DEFAULT_ENTITY_CACHE_SIZE),
+ @Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
+ @Property(name=INCLUDE_SIMILAR_SCORE, boolValue=DEFAULT_INCLUDE_SIMILAR_SCORE),
+ @Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
+ @Property(name=DEFAULT_MATCHING_LANGUAGE,value=""),
+ @Property(name=FstLinkingEngineComponent.NAMED_ENTITY_TYPE_MAPPINGS,
+ cardinality=Integer.MAX_VALUE, value={
+ "dbp-ont:Person > dbp-ont:Person; schema:Person; foaf:Person",
+ "dbp-ont:Organisation > dbp-ont:Organisation; dbp-ont:Newspaper; schema:Organization",
+ "dbp-ont:Place > dbp-ont:Place; schema:Place; geonames:Feature"}),
+ @Property(name=SERVICE_RANKING,intValue=0)
+})
+public class NamedEntityFstLinkingComponnet extends FstLinkingEngineComponent {
+
+ /**
+ * used to resolve '{prefix}:{local-name}' used within the engines configuration
+ */
+ @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
+ private NamespacePrefixService prefixService;
+
+
+ @Activate
+ @Override
+ protected void activate(ComponentContext ctx) throws ConfigurationException {
+ log.info("activate {}",getClass().getSimpleName());
+ this.bundleContext = ctx.getBundleContext();
+ super.applyConfig(LinkingModeEnum.NER, ctx.getProperties(), prefixService);
+ }
+
+ @Deactivate
+ @Override
+ protected void deactivate(ComponentContext ctx) {
+ super.deactivate(ctx);
+ }
+}
Propchange: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityFstLinkingComponnet.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java?rev=1674012&view=auto
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java (added)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java Thu Apr 16 07:51:12 2015
@@ -0,0 +1,259 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.engines.lucenefstlinking;
+
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
+
+import java.io.IOException;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.Set;
+
+import org.apache.commons.collections.Predicate;
+import org.apache.commons.collections.iterators.FilterIterator;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.Chunk;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
+import org.opensextant.solrtexttagger.TagClusterReducer;
+import org.opensextant.solrtexttagger.TagLL;
+import org.opensextant.solrtexttagger.TaggingAttribute;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Class that ensures that only Tokens within a {@link Chunk} with a
+ * {@link NerTag} are processed.<p>
+ * This is ensured on two places:<ol>
+ * <li> Classifies Tokens in the Solr {@link TokenStream} with the {@link TaggingAttribute}
+ * based on {@link NerTag}s present in the {@link AnalysedText}.<p>
+ * <li> Implements {@link TagClusterReducer} to ensure that all {@link TagLL tags}
+ * that do not cover the whole Named Entity are removed from the Cluster.
+ * </ol>
+ * <b> Implementation Details</b><p>
+ * The {@link TokenStream} implementation of this class does set
+ * <code>{@link TaggingAttribute#isTaggable()} == ture</code> if the do overlap
+ * with a {@link Chunk} having an {@link NerTag}
+ * <p>
+ * The {@link TagClusterReducer} implementation keeps track of Chunks with
+ * {@link NerTag} while iterating over the {@link TokenStream} and adds them to
+ * the end of a List. When {@link TagClusterReducer#reduce(TagLL[])} is called
+ * tags of the cluster are checked if they do cover Chunks with a {@link NerTag}.
+ * If they do not they are removed from the cluster.
+ * <p>
+ * This implementation was derived from the {@link LinkableTokenFilter}
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public final class NamedEntityTokenFilter extends TokenFilter implements TagClusterReducer{
+
+ private final Logger log = LoggerFactory.getLogger(NamedEntityTokenFilter.class);
+
+ /**
+ * The NLP processing results
+ */
+ private AnalysedText at;
+ /**
+ * The language of the text
+ */
+
+ /**
+ * Iterator over all {@link Chunk}s in the {@link AnalysedText} that do
+ * have an {@link NerTag}
+ */
+ private Iterator<Chunk> neChunks;
+
+ protected final CharTermAttribute termAtt;
+ protected final OffsetAttribute offset;
+ protected final TaggingAttribute taggable;
+
+ private int lookupCount = 0;
+ private int incrementCount = 0;
+
+ /**
+ * List with {@link Chunk}s having {@link NerTag}s. This is used by
+ * the {@link #reduce(TagLL[])} method to check if {@link TagLL tags}
+ * do cover Named Entities detected in the text.
+ */
+ private List<Chunk> nePhrases;
+
+ private final NavigableMap<int[],Set<String>> nePhrasesTypes;
+
+ private Chunk neChunk;
+
+ protected final boolean wildcardType;
+
+ protected final Set<String> neTypes;
+
+ /**
+ * A Token Filter for Named Entities of the configured types. Also collects
+ * '<code>span -> type</code>' mappings for Named Entities.
+ * @param input the input token stream for the parsed text
+ * @param at the {@link AnalysedText} containing {@link NerTag} values
+ * @param lang the language of the text
+ * @param neTypes the string {@link NerTag#getType()} and {@link NerTag#getTag()}
+ * values of enabled Named Entities. If <code>null</code> or containing the
+ * <code>null</code> element all types will be accepted.
+ * @param nePhrasesTypes The {@link NavigableMap} used to store the spans of
+ * named entities as key and the set o their {@link NerTag#getTag()} and
+ * {@link NerTag#getType()} as values. Those information are collected while
+ * iterating over the text (by the {@link NamedEntityPredicate}) and are
+ * used later for filtering {@link Match}es based on the type of the Entities.
+ * Typically the {@link TaggingSession#entityMentionTypes} is parsed as this
+ * parameter.
+ */
+ protected NamedEntityTokenFilter(TokenStream input, AnalysedText at, String lang,
+ Set<String> neTypes, NavigableMap<int[],Set<String>> nePhrasesTypes) {
+ super(input);
+ //STANBOL-1177: add attributes in doPrivileged to avoid
+ //AccessControlException: access denied ("java.lang.RuntimePermission" "getClassLoader")
+ termAtt = AccessController.doPrivileged(new PrivilegedAction<CharTermAttribute>() {
+ @Override public CharTermAttribute run() {
+ return addAttribute(CharTermAttribute.class);
+ }});
+ offset = AccessController.doPrivileged(new PrivilegedAction<OffsetAttribute>() {
+ @Override public OffsetAttribute run() {
+ return addAttribute(OffsetAttribute.class);
+ }});
+ taggable = AccessController.doPrivileged(new PrivilegedAction<TaggingAttribute>() {
+ @Override public TaggingAttribute run() {
+ return addAttribute(TaggingAttribute.class);
+ }});
+ this.at = at;
+ this.wildcardType = neTypes == null || neTypes.contains(null);
+ this.neTypes = neTypes;
+ this.nePhrasesTypes = nePhrasesTypes;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ nePhrases = new LinkedList<Chunk>();
+ neChunks = new FilterIterator(at.getChunks(), new NamedEntityPredicate());
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if(input.incrementToken()){
+ incrementCount++;
+ if(log.isTraceEnabled()){
+ log.trace("> solr:[{},{}] {}",new Object[]{
+ offset.startOffset(), offset.endOffset(), termAtt});
+ }
+ while((neChunk == null || neChunk.getEnd() < offset.startOffset()) && neChunks.hasNext()){
+ neChunk = neChunks.next();
+ nePhrases.add(neChunk);
+ }
+ if(neChunk == null){
+ taggable.setTaggable(false);
+ incrementCount++;
+ log.debug("lookup percentage: {}",lookupCount*100/(float)incrementCount);
+ return false;
+ } else if(offset.endOffset() > neChunk.getStart()
+ || offset.startOffset() < neChunk.getEnd()){
+ //set tagable to true if the tokens overlapps with the current chunk
+ taggable.setTaggable(true);
+ if(log.isTraceEnabled()){
+ log.trace("lookup: token [{},{}]: {} | named Entity [{},{}]:{}",
+ new Object[]{ offset.startOffset(), offset.endOffset(),
+ termAtt, neChunk.getStart(), neChunk.getEnd(),
+ neChunk.getSpan()});
+ }
+ lookupCount++;
+ } else {
+ taggable.setTaggable(false);
+ }
+ incrementCount++;
+ return true;
+ } else { //no more tokens in the parent token stream
+ return false;
+ }
+ }
+
+ @Override
+ public void reduce(TagLL[] head) {
+ //(1) reduce Tags based on named entity phrases.
+ for(TagLL tag = head[0]; tag != null; tag = tag.getNextTag()) {
+ int start = tag.getStartOffset();
+ int end = tag.getEndOffset();
+ Chunk nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0);
+ while(nePhrase != null && nePhrase.getEnd() <= start){
+ nePhrases.remove(0);
+ nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0);
+ }
+ if(nePhrase == null || !(start <= nePhrase.getStart() && end >= nePhrase.getEnd())){
+ //does not cover any named entity phrase
+ tag.removeLL(); //remove the tag from the cluster
+ if(log.isTraceEnabled()){
+ log.trace(" > reduce tag {} - does not cover {}", tag, nePhrase);
+ }
+ } else if(log.isTraceEnabled()) {//the current Tag coveres a named entity phrase
+ log.trace(" > keep tag {} for {}", tag, nePhrase);
+ }
+ }
+ }
+
+ /**
+ * {@link Predicate} used to select Named Entities based on matching
+ * {@link NerTag#getTag()} and {@link NerTag#getType()} values against the
+ * {@link NamedEntityTokenFilter#neTypes} configuration. As a side effect
+ * this also collects the {@link NamedEntityTokenFilter#nePhrasesTypes}
+ * information. This avoids a 2nd pass over the {@link AnalysedText} to
+ * collect those information
+ * @author Rupert Westenthaler
+ *
+ */
+ final class NamedEntityPredicate implements Predicate {
+ @Override
+ public boolean evaluate(Object o) {
+ if(o instanceof Chunk){
+ Chunk chunk = (Chunk)o;
+ Value<NerTag> nerValue = chunk.getAnnotation(NER_ANNOTATION);
+ if(nerValue != null){
+ NerTag nerTag = nerValue.value();
+ String nerType = nerTag.getType() != null ?
+ nerTag.getType().getUnicodeString() : null;
+ if( wildcardType || neTypes.contains(nerTag.getTag())
+ || (nerType != null && neTypes.contains(nerType))){
+ int[] span = new int[]{chunk.getStart(), chunk.getEnd()};
+ Set<String> types = nePhrasesTypes.get(span);
+ if(types == null){
+ types = new HashSet<String>(4);
+ nePhrasesTypes.put(span, types);
+ }
+ types.add(nerType);
+ types.add(nerTag.getTag());
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ }
+}
Propchange: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/NamedEntityTokenFilter.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java?rev=1674012&view=auto
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java (added)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java Thu Apr 16 07:51:12 2015
@@ -0,0 +1,119 @@
+package org.apache.stanbol.enhancer.engines.lucenefstlinking;
+
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MATCHING_LANGUAGE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_SUGGESTIONS;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.ENTITY_TYPES;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.INCLUDE_SIMILAR_SCORE;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.SUGGESTIONS;
+import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.TYPE_MAPPINGS;
+import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
+import static org.osgi.framework.Constants.SERVICE_RANKING;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.PropertyOption;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.ReferenceCardinality;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * OSGI component used to configure a {@link FstLinkingEngine} with
+ * {@link LinkingModeEnum#PLAIN}. <p>
+ * <b>NOTE:</b> In this configuration no NLP processing other than language
+ * detection is required for linking.
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+@Component(
+ configurationFactory = true,
+ policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
+ specVersion = "1.1",
+ metatype = true,
+ immediate = true,
+ inherit = false)
+@Properties(value={
+ @Property(name=PROPERTY_NAME), //the name of the engine
+ @Property(name=FstLinkingEngineComponent.SOLR_CORE),
+ @Property(name=IndexConfiguration.FIELD_ENCODING, options={
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.none",
+ name="None"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.solrYard",
+ name="SolrYard"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusPrefix",
+ name="MinusPrefix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscorePrefix",
+ name="UnderscorePrefix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.minusSuffix",
+ name="MinusSuffix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.underscoreSuffix",
+ name="UnderscoreSuffix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.atPrefix",
+ name="AtPrefix"),
+ @PropertyOption(
+ value='%'+IndexConfiguration.FIELD_ENCODING+".option.atSuffix",
+ name="AtSuffix")
+ },value="SolrYard"),
+ @Property(name=IndexConfiguration.FST_CONFIG, cardinality=Integer.MAX_VALUE),
+ @Property(name=IndexConfiguration.FST_FOLDER,
+ value=IndexConfiguration.DEFAULT_FST_FOLDER),
+ @Property(name=IndexConfiguration.SOLR_TYPE_FIELD, value="rdf:type"),
+ @Property(name=IndexConfiguration.SOLR_RANKING_FIELD, value="entityhub:entityRank"),
+ @Property(name=FstLinkingEngineComponent.FST_THREAD_POOL_SIZE,
+ intValue=FstLinkingEngineComponent.DEFAULT_FST_THREAD_POOL_SIZE),
+ @Property(name=FstLinkingEngineComponent.ENTITY_CACHE_SIZE,
+ intValue=FstLinkingEngineComponent.DEFAULT_ENTITY_CACHE_SIZE),
+ @Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
+ @Property(name=INCLUDE_SIMILAR_SCORE, boolValue=DEFAULT_INCLUDE_SIMILAR_SCORE),
+ @Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
+ @Property(name=DEFAULT_MATCHING_LANGUAGE,value=""),
+ @Property(name=ENTITY_TYPES,cardinality=Integer.MAX_VALUE),
+ @Property(name=TYPE_MAPPINGS,cardinality=Integer.MAX_VALUE, value={
+ "dbp-ont:Organisation; dbp-ont:Newspaper; schema:Organization > dbp-ont:Organisation",
+ "dbp-ont:Person; foaf:Person; schema:Person > dbp-ont:Person",
+ "dbp-ont:Place; schema:Place; geonames:Feature > dbp-ont:Place",
+ "dbp-ont:Work; schema:CreativeWork > dbp-ont:Work",
+ "dbp-ont:Event; schema:Event > dbp-ont:Event",
+ "schema:Product > schema:Product",
+ "skos:Concept > skos:Concept"}),
+ @Property(name=SERVICE_RANKING,intValue=0)
+ })
+public class PlainFstLinkingComponnet extends FstLinkingEngineComponent {
+
+ /**
+ * used to resolve '{prefix}:{local-name}' used within the engines configuration
+ */
+ @Reference(cardinality=ReferenceCardinality.OPTIONAL_UNARY)
+ private NamespacePrefixService prefixService;
+
+
+ @Activate
+ @Override
+ protected void activate(ComponentContext ctx) throws ConfigurationException {
+ log.info("activate {}",getClass().getSimpleName());
+ this.bundleContext = ctx.getBundleContext();
+ super.applyConfig(LinkingModeEnum.PLAIN, ctx.getProperties(), prefixService);
+ }
+
+ @Deactivate
+ @Override
+ protected void deactivate(ComponentContext ctx) {
+ super.deactivate(ctx);
+ }
+}
Propchange: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/PlainFstLinkingComponnet.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java Thu Apr 16 07:51:12 2015
@@ -24,11 +24,12 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.NavigableMap;
import java.util.Set;
+import java.util.TreeMap;
import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.Literal;
-import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.commons.lang.StringUtils;
@@ -40,24 +41,17 @@ import org.apache.lucene.document.String
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.queries.function.valuesource.IfFunction;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.stanbol.enhancer.engines.lucenefstlinking.Match.FieldLoader;
import org.apache.stanbol.enhancer.engines.lucenefstlinking.Match.FieldType;
import org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.EntityCache;
-import org.apache.stanbol.enhancer.engines.lucenefstlinking.impl.ValueSourceAccessor;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.EngineException;
-import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
import org.opensextant.solrtexttagger.TaggerFstCorpus;
-import org.opensextant.solrtexttagger.UnsupportedTokenException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.google.common.eventbus.AllowConcurrentEvents;
-
/**
* Profile created based on the {@link IndexConfiguration} for processing a
* parsed ContentItem. <p>
@@ -101,6 +95,18 @@ public class TaggingSession implements C
protected final String typeField;
protected final String redirectField;
protected final String rankingField;
+
+ /**
+ * Used in the {@link LinkingModeEnum#NER} to store the {@link NerTag#getTag()}
+ * and {@link NerTag#getType()} values for the span of the Named Entity.<p>
+ * This information is collected by the {@link NamedEntityTokenFilter} while
+ * iterating over the parsed text and is used in the processing of
+ * {@link Tag}s to filter Entities based on their types. <p>
+ * Not used in any linking mode other than <code>NER</code>
+ */
+ protected final NavigableMap<int[],Set<String>> entityMentionTypes =
+ new TreeMap<int[],Set<String>>(Tag.SPAN_COMPARATOR);
+
private final RefCounted<SolrIndexSearcher> searcherRef;
/**
* Document Cache and session statistics for the cache
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties Thu Apr 16 07:51:12 2015
@@ -25,9 +25,28 @@ one with the higher ranking will be used
#Properties specific to the FST linking engine
#===============================================================================
org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.name=Apache \
-Stanbol Enhancer Engine: FST Linking
+Stanbol Enhancer Engine: FST Linking: Linkable Token
org.apache.stanbol.enhancer.engines.lucenefstlinking.FstLinkingEngineComponent.description=Lucene \
-FST based Entity Linking Engine implementation.
+FST based Entity Linking Engine that looks up Linkable Tokens in the controlled vocabulary. \
+Typically Proper Nouns (or all Nouns) are considered as linkable. Also Noun Phrases are \
+used to ensure that single word matches are not matched for phrases in the text (e.g. that \
+"university" is not matched with "University of Munich" mentioned in the text).
+
+org.apache.stanbol.enhancer.engines.lucenefstlinking.NamedEntityFstLinkingComponnet.name=Apache \
+Stanbol Enhancer Engine: FST Linking: Named Entities
+org.apache.stanbol.enhancer.engines.lucenefstlinking.NamedEntityFstLinkingComponnet= Lucene \
+FST based Entity Linking Enigne that looks up Named Entities recognized in the text in the \
+configured controlled vocabulary. This mode supports to filter possible matches in the \
+vocabulary based on the type detected for the Named Entity.
+
+org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet.name=Apache \
+Stanbol Enhancer Engine: FST Linking: Plain
+org.apache.stanbol.enhancer.engines.lucenefstlinking.PlainFstLinkingComponnet.description=\
+Lucene FST based Entity Linking Engine that operates on the plain text. It does not use \
+(and require) any NLP processing results (other than language detection). The Query time \
+Lucene Analyzer is used to process the parsed text and every token is linked with the \
+controlled vocabulary.
+
enhancer.engines.linking.lucenefst.solrcore.name=Solr Core
enhancer.engines.linking.lucenefst.solrcore.description=The reference to the SolrCore. \
@@ -153,15 +172,23 @@ enhancer.engines.linking.entityTypes.nam
enhancer.engines.linking.entityTypes.description=Allows to define a white/black list \
based on the types of Entities. Use '!{uri}' for black listing and '{uri}' for white \
listing. Include '*' to force white listing (e.g. to allow Entities without any type). \
-Rules are processed based on their oder.
+Rules are processed based on their oder. NOTE: Not used in the NER linking mode
enhancer.engines.linking.lucenefst.mode.name=Linking Mode
enhancer.engines.linking.lucenefst.mode.description=The linking mode allows to switch the \
operation mode of the FST linking engine: PLAIN will link every single word with the \
vocabulary. No NLP processing is required in this mode; LINKABLE_TOKEN will use NLP \
processing results to determine what tokens should be linked (typically all Nouns or \
-only ProperNouns - configurable via the TextProcessing configuration);
-#finally the NER mode will only link Named Entities detected by a NER component.
+only ProperNouns - configurable via the TextProcessing configuration); \
+finally the NER mode will only link Named Entities detected by a NER component.
enhancer.engines.linking.lucenefst.mode.option.plain=Plain
enhancer.engines.linking.lucenefst.mode.option.linkableToken=Linkable Tokens
-#enhancer.engines.linking.lucenefst.mode.option.ner=NER (not yet implemented)
+enhancer.engines.linking.lucenefst.mode.option.ner=NER
+
+enhancer.engines.linking.lucenefst.neTypeMapping.name=Named Entity Type Mappings
+enhancer.engines.linking.lucenefst.neTypeMapping.description=Allows to map Named \
+Entity Tags and Types to Entity types. Syntax: {ne-type} > {entity-type-1}; {entity-type-2}. \
+(e.g. a mapping for the tag "Person" to the type schema:Person - "Person > http://schema.org/Person", \
+a second mapping for the type "dbpedia:Person" to person types of different ontologies \
+"dbpedia:Person > dbpedia:Person; schema:Person; foaf:Person"). \
+NOTE: Only used in the NER linking mode.
Modified: stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java?rev=1674012&r1=1674011&r2=1674012&view=diff
==============================================================================
--- stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java (original)
+++ stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/test/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineTest.java Thu Apr 16 07:51:12 2015
@@ -301,7 +301,7 @@ public class FstLinkingEngineTest {
elc.setMinFoundTokens(2);//this is assumed by this test
elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking",
- LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc);
+ LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc, null);
processConentItem(engine);
validateEnhancements(
Arrays.asList(
@@ -322,7 +322,7 @@ public class FstLinkingEngineTest {
elc.setMinFoundTokens(2);//this is assumed by this test
elc.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
FstLinkingEngine engine = new FstLinkingEngine("proper-noun-linking",
- LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc);
+ LinkingModeEnum.LINKABLE_TOKEN, fstConfig, tpc, elc, null);
processConentItem(engine);
validateEnhancements(
Arrays.asList(